1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1376 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1377 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1378 1379 //============================================================================= 1380 const bool Matcher::match_rule_supported(int opcode) { 1381 if (!has_match_rule(opcode)) 1382 return false; 1383 1384 bool ret_value = true; 1385 switch (opcode) { 1386 case Op_AbsVL: 1387 if (UseAVX < 3) 1388 ret_value = false; 1389 case Op_PopCountI: 1390 case Op_PopCountL: 1391 if (!UsePopCountInstruction) 1392 ret_value = false; 1393 break; 1394 case Op_PopCountVI: 1395 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1396 ret_value = false; 1397 break; 1398 case Op_MulVI: 1399 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1400 ret_value = false; 1401 break; 1402 case Op_MulVL: 1403 case Op_MulReductionVL: 1404 if (VM_Version::supports_avx512dq() == false) 1405 ret_value = false; 1406 break; 1407 case Op_AddReductionVL: 1408 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1409 ret_value = false; 1410 break; 1411 case Op_AbsVB: 1412 case Op_AbsVS: 1413 case Op_AbsVI: 1414 case Op_AddReductionVI: 1415 if (UseSSE < 3) // requires at least SSE3 1416 ret_value = false; 1417 break; 1418 case Op_MulReductionVI: 1419 if (UseSSE < 4) // requires at least SSE4 1420 ret_value = false; 1421 break; 1422 case Op_AddReductionVF: 1423 case Op_AddReductionVD: 1424 case Op_MulReductionVF: 1425 case Op_MulReductionVD: 1426 if (UseSSE < 1) // requires at least SSE 1427 ret_value = false; 1428 break; 1429 case Op_SqrtVD: 1430 case Op_SqrtVF: 1431 if (UseAVX < 1) // enabled for AVX only 1432 ret_value = false; 1433 break; 1434 case Op_CompareAndSwapL: 1435 #ifdef _LP64 1436 case Op_CompareAndSwapP: 1437 #endif 1438 if (!VM_Version::supports_cx8()) 1439 ret_value = false; 1440 break; 1441 case Op_CMoveVF: 1442 case Op_CMoveVD: 1443 if (UseAVX < 1 || UseAVX > 2) 1444 ret_value = false; 1445 break; 1446 case Op_StrIndexOf: 1447 if (!UseSSE42Intrinsics) 1448 ret_value = false; 1449 break; 1450 case Op_StrIndexOfChar: 1451 if (!UseSSE42Intrinsics) 1452 ret_value = false; 1453 break; 1454 case Op_OnSpinWait: 1455 if (VM_Version::supports_on_spin_wait() == false) 1456 ret_value = false; 1457 break; 1458 case Op_MulAddVS2VI: 1459 case Op_RShiftVL: 1460 case Op_AbsVD: 1461 case Op_NegVD: 1462 if (UseSSE < 2) 1463 ret_value = false; 1464 break; 1465 case Op_MulVB: 1466 case Op_LShiftVB: 1467 case Op_RShiftVB: 1468 case Op_URShiftVB: 1469 if (UseSSE < 4) 1470 ret_value = false; 1471 break; 1472 #ifdef _LP64 1473 case Op_MaxD: 1474 case Op_MaxF: 1475 case Op_MinD: 1476 case Op_MinF: 1477 if (UseAVX < 1) // enabled for AVX only 1478 ret_value = false; 1479 break; 1480 #endif 1481 } 1482 1483 return ret_value; // Per default match rules are supported. 1484 } 1485 1486 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1487 // identify extra cases that we might want to provide match rules for 1488 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1489 bool ret_value = match_rule_supported(opcode); 1490 if (ret_value) { 1491 switch (opcode) { 1492 case Op_AbsVB: 1493 case Op_AddVB: 1494 case Op_SubVB: 1495 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1496 ret_value = false; 1497 break; 1498 case Op_AbsVS: 1499 case Op_AddVS: 1500 case Op_SubVS: 1501 case Op_MulVS: 1502 case Op_LShiftVS: 1503 case Op_RShiftVS: 1504 case Op_URShiftVS: 1505 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1506 ret_value = false; 1507 break; 1508 case Op_MulVB: 1509 case Op_LShiftVB: 1510 case Op_RShiftVB: 1511 case Op_URShiftVB: 1512 if ((vlen == 32 && UseAVX < 2) || 1513 ((vlen == 64) && (VM_Version::supports_avx512bw() == false))) 1514 ret_value = false; 1515 break; 1516 case Op_NegVF: 1517 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) 1518 ret_value = false; 1519 break; 1520 case Op_CMoveVF: 1521 if (vlen != 8) 1522 ret_value = false; 1523 break; 1524 case Op_NegVD: 1525 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) 1526 ret_value = false; 1527 break; 1528 case Op_CMoveVD: 1529 if (vlen != 4) 1530 ret_value = false; 1531 break; 1532 } 1533 } 1534 1535 return ret_value; // Per default match rules are supported. 1536 } 1537 1538 const bool Matcher::has_predicated_vectors(void) { 1539 bool ret_value = false; 1540 if (UseAVX > 2) { 1541 ret_value = VM_Version::supports_avx512vl(); 1542 } 1543 1544 return ret_value; 1545 } 1546 1547 const int Matcher::float_pressure(int default_pressure_threshold) { 1548 int float_pressure_threshold = default_pressure_threshold; 1549 #ifdef _LP64 1550 if (UseAVX > 2) { 1551 // Increase pressure threshold on machines with AVX3 which have 1552 // 2x more XMM registers. 1553 float_pressure_threshold = default_pressure_threshold * 2; 1554 } 1555 #endif 1556 return float_pressure_threshold; 1557 } 1558 1559 // Max vector size in bytes. 0 if not supported. 1560 const int Matcher::vector_width_in_bytes(BasicType bt) { 1561 assert(is_java_primitive(bt), "only primitive type vectors"); 1562 if (UseSSE < 2) return 0; 1563 // SSE2 supports 128bit vectors for all types. 1564 // AVX2 supports 256bit vectors for all types. 1565 // AVX2/EVEX supports 512bit vectors for all types. 1566 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1567 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1568 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1569 size = (UseAVX > 2) ? 64 : 32; 1570 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1571 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1572 // Use flag to limit vector size. 1573 size = MIN2(size,(int)MaxVectorSize); 1574 // Minimum 2 values in vector (or 4 for bytes). 1575 switch (bt) { 1576 case T_DOUBLE: 1577 case T_LONG: 1578 if (size < 16) return 0; 1579 break; 1580 case T_FLOAT: 1581 case T_INT: 1582 if (size < 8) return 0; 1583 break; 1584 case T_BOOLEAN: 1585 if (size < 4) return 0; 1586 break; 1587 case T_CHAR: 1588 if (size < 4) return 0; 1589 break; 1590 case T_BYTE: 1591 if (size < 4) return 0; 1592 break; 1593 case T_SHORT: 1594 if (size < 4) return 0; 1595 break; 1596 default: 1597 ShouldNotReachHere(); 1598 } 1599 return size; 1600 } 1601 1602 // Limits on vector size (number of elements) loaded into vector. 1603 const int Matcher::max_vector_size(const BasicType bt) { 1604 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1605 } 1606 const int Matcher::min_vector_size(const BasicType bt) { 1607 int max_size = max_vector_size(bt); 1608 // Min size which can be loaded into vector is 4 bytes. 1609 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1610 return MIN2(size,max_size); 1611 } 1612 1613 // Vector ideal reg corresponding to specified size in bytes 1614 const uint Matcher::vector_ideal_reg(int size) { 1615 assert(MaxVectorSize >= size, ""); 1616 switch(size) { 1617 case 4: return Op_VecS; 1618 case 8: return Op_VecD; 1619 case 16: return Op_VecX; 1620 case 32: return Op_VecY; 1621 case 64: return Op_VecZ; 1622 } 1623 ShouldNotReachHere(); 1624 return 0; 1625 } 1626 1627 // Only lowest bits of xmm reg are used for vector shift count. 1628 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1629 return Op_VecS; 1630 } 1631 1632 // x86 supports misaligned vectors store/load. 1633 const bool Matcher::misaligned_vectors_ok() { 1634 return true; 1635 } 1636 1637 // x86 AES instructions are compatible with SunJCE expanded 1638 // keys, hence we do not need to pass the original key to stubs 1639 const bool Matcher::pass_original_key_for_aes() { 1640 return false; 1641 } 1642 1643 1644 const bool Matcher::convi2l_type_required = true; 1645 1646 // Check for shift by small constant as well 1647 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1648 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1649 shift->in(2)->get_int() <= 3 && 1650 // Are there other uses besides address expressions? 1651 !matcher->is_visited(shift)) { 1652 address_visited.set(shift->_idx); // Flag as address_visited 1653 mstack.push(shift->in(2), Matcher::Visit); 1654 Node *conv = shift->in(1); 1655 #ifdef _LP64 1656 // Allow Matcher to match the rule which bypass 1657 // ConvI2L operation for an array index on LP64 1658 // if the index value is positive. 1659 if (conv->Opcode() == Op_ConvI2L && 1660 conv->as_Type()->type()->is_long()->_lo >= 0 && 1661 // Are there other uses besides address expressions? 1662 !matcher->is_visited(conv)) { 1663 address_visited.set(conv->_idx); // Flag as address_visited 1664 mstack.push(conv->in(1), Matcher::Pre_Visit); 1665 } else 1666 #endif 1667 mstack.push(conv, Matcher::Pre_Visit); 1668 return true; 1669 } 1670 return false; 1671 } 1672 1673 // Should the Matcher clone shifts on addressing modes, expecting them 1674 // to be subsumed into complex addressing expressions or compute them 1675 // into registers? 1676 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1677 Node *off = m->in(AddPNode::Offset); 1678 if (off->is_Con()) { 1679 address_visited.test_set(m->_idx); // Flag as address_visited 1680 Node *adr = m->in(AddPNode::Address); 1681 1682 // Intel can handle 2 adds in addressing mode 1683 // AtomicAdd is not an addressing expression. 1684 // Cheap to find it by looking for screwy base. 1685 if (adr->is_AddP() && 1686 !adr->in(AddPNode::Base)->is_top() && 1687 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1688 // Are there other uses besides address expressions? 1689 !is_visited(adr)) { 1690 address_visited.set(adr->_idx); // Flag as address_visited 1691 Node *shift = adr->in(AddPNode::Offset); 1692 if (!clone_shift(shift, this, mstack, address_visited)) { 1693 mstack.push(shift, Pre_Visit); 1694 } 1695 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1696 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1697 } else { 1698 mstack.push(adr, Pre_Visit); 1699 } 1700 1701 // Clone X+offset as it also folds into most addressing expressions 1702 mstack.push(off, Visit); 1703 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1704 return true; 1705 } else if (clone_shift(off, this, mstack, address_visited)) { 1706 address_visited.test_set(m->_idx); // Flag as address_visited 1707 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1708 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1709 return true; 1710 } 1711 return false; 1712 } 1713 1714 void Compile::reshape_address(AddPNode* addp) { 1715 } 1716 1717 // Helper methods for MachSpillCopyNode::implementation(). 1718 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1719 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1720 // In 64-bit VM size calculation is very complex. Emitting instructions 1721 // into scratch buffer is used to get size in 64-bit VM. 1722 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1723 assert(ireg == Op_VecS || // 32bit vector 1724 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1725 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1726 "no non-adjacent vector moves" ); 1727 if (cbuf) { 1728 MacroAssembler _masm(cbuf); 1729 int offset = __ offset(); 1730 switch (ireg) { 1731 case Op_VecS: // copy whole register 1732 case Op_VecD: 1733 case Op_VecX: 1734 #ifndef _LP64 1735 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1736 #else 1737 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1738 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1739 } else { 1740 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1741 } 1742 #endif 1743 break; 1744 case Op_VecY: 1745 #ifndef _LP64 1746 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1747 #else 1748 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1749 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1750 } else { 1751 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1752 } 1753 #endif 1754 break; 1755 case Op_VecZ: 1756 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1757 break; 1758 default: 1759 ShouldNotReachHere(); 1760 } 1761 int size = __ offset() - offset; 1762 #ifdef ASSERT 1763 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1764 assert(!do_size || size == 4, "incorrect size calculattion"); 1765 #endif 1766 return size; 1767 #ifndef PRODUCT 1768 } else if (!do_size) { 1769 switch (ireg) { 1770 case Op_VecS: 1771 case Op_VecD: 1772 case Op_VecX: 1773 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1774 break; 1775 case Op_VecY: 1776 case Op_VecZ: 1777 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1778 break; 1779 default: 1780 ShouldNotReachHere(); 1781 } 1782 #endif 1783 } 1784 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1785 return (UseAVX > 2) ? 6 : 4; 1786 } 1787 1788 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1789 int stack_offset, int reg, uint ireg, outputStream* st) { 1790 // In 64-bit VM size calculation is very complex. Emitting instructions 1791 // into scratch buffer is used to get size in 64-bit VM. 1792 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1793 if (cbuf) { 1794 MacroAssembler _masm(cbuf); 1795 int offset = __ offset(); 1796 if (is_load) { 1797 switch (ireg) { 1798 case Op_VecS: 1799 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1800 break; 1801 case Op_VecD: 1802 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1803 break; 1804 case Op_VecX: 1805 #ifndef _LP64 1806 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1807 #else 1808 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1809 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1810 } else { 1811 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1812 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1813 } 1814 #endif 1815 break; 1816 case Op_VecY: 1817 #ifndef _LP64 1818 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1819 #else 1820 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1821 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1822 } else { 1823 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1824 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1825 } 1826 #endif 1827 break; 1828 case Op_VecZ: 1829 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1830 break; 1831 default: 1832 ShouldNotReachHere(); 1833 } 1834 } else { // store 1835 switch (ireg) { 1836 case Op_VecS: 1837 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1838 break; 1839 case Op_VecD: 1840 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1841 break; 1842 case Op_VecX: 1843 #ifndef _LP64 1844 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1845 #else 1846 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1847 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1848 } 1849 else { 1850 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1851 } 1852 #endif 1853 break; 1854 case Op_VecY: 1855 #ifndef _LP64 1856 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1857 #else 1858 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1859 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1860 } 1861 else { 1862 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1863 } 1864 #endif 1865 break; 1866 case Op_VecZ: 1867 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1868 break; 1869 default: 1870 ShouldNotReachHere(); 1871 } 1872 } 1873 int size = __ offset() - offset; 1874 #ifdef ASSERT 1875 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1876 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1877 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1878 #endif 1879 return size; 1880 #ifndef PRODUCT 1881 } else if (!do_size) { 1882 if (is_load) { 1883 switch (ireg) { 1884 case Op_VecS: 1885 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1886 break; 1887 case Op_VecD: 1888 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1889 break; 1890 case Op_VecX: 1891 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1892 break; 1893 case Op_VecY: 1894 case Op_VecZ: 1895 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1896 break; 1897 default: 1898 ShouldNotReachHere(); 1899 } 1900 } else { // store 1901 switch (ireg) { 1902 case Op_VecS: 1903 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1904 break; 1905 case Op_VecD: 1906 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1907 break; 1908 case Op_VecX: 1909 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1910 break; 1911 case Op_VecY: 1912 case Op_VecZ: 1913 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1914 break; 1915 default: 1916 ShouldNotReachHere(); 1917 } 1918 } 1919 #endif 1920 } 1921 bool is_single_byte = false; 1922 int vec_len = 0; 1923 if ((UseAVX > 2) && (stack_offset != 0)) { 1924 int tuple_type = Assembler::EVEX_FVM; 1925 int input_size = Assembler::EVEX_32bit; 1926 switch (ireg) { 1927 case Op_VecS: 1928 tuple_type = Assembler::EVEX_T1S; 1929 break; 1930 case Op_VecD: 1931 tuple_type = Assembler::EVEX_T1S; 1932 input_size = Assembler::EVEX_64bit; 1933 break; 1934 case Op_VecX: 1935 break; 1936 case Op_VecY: 1937 vec_len = 1; 1938 break; 1939 case Op_VecZ: 1940 vec_len = 2; 1941 break; 1942 } 1943 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1944 } 1945 int offset_size = 0; 1946 int size = 5; 1947 if (UseAVX > 2 ) { 1948 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1949 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1950 size += 2; // Need an additional two bytes for EVEX encoding 1951 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1952 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1953 } else { 1954 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1955 size += 2; // Need an additional two bytes for EVEX encodding 1956 } 1957 } else { 1958 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1959 } 1960 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1961 return size+offset_size; 1962 } 1963 1964 static inline jint replicate4_imm(int con, int width) { 1965 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1966 assert(width == 1 || width == 2, "only byte or short types here"); 1967 int bit_width = width * 8; 1968 jint val = con; 1969 val &= (1 << bit_width) - 1; // mask off sign bits 1970 while(bit_width < 32) { 1971 val |= (val << bit_width); 1972 bit_width <<= 1; 1973 } 1974 return val; 1975 } 1976 1977 static inline jlong replicate8_imm(int con, int width) { 1978 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1979 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1980 int bit_width = width * 8; 1981 jlong val = con; 1982 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1983 while(bit_width < 64) { 1984 val |= (val << bit_width); 1985 bit_width <<= 1; 1986 } 1987 return val; 1988 } 1989 1990 #ifndef PRODUCT 1991 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1992 st->print("nop \t# %d bytes pad for loops and calls", _count); 1993 } 1994 #endif 1995 1996 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1997 MacroAssembler _masm(&cbuf); 1998 __ nop(_count); 1999 } 2000 2001 uint MachNopNode::size(PhaseRegAlloc*) const { 2002 return _count; 2003 } 2004 2005 #ifndef PRODUCT 2006 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2007 st->print("# breakpoint"); 2008 } 2009 #endif 2010 2011 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2012 MacroAssembler _masm(&cbuf); 2013 __ int3(); 2014 } 2015 2016 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2017 return MachNode::size(ra_); 2018 } 2019 2020 %} 2021 2022 encode %{ 2023 2024 enc_class call_epilog %{ 2025 if (VerifyStackAtCalls) { 2026 // Check that stack depth is unchanged: find majik cookie on stack 2027 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2028 MacroAssembler _masm(&cbuf); 2029 Label L; 2030 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2031 __ jccb(Assembler::equal, L); 2032 // Die if stack mismatch 2033 __ int3(); 2034 __ bind(L); 2035 } 2036 %} 2037 2038 %} 2039 2040 2041 //----------OPERANDS----------------------------------------------------------- 2042 // Operand definitions must precede instruction definitions for correct parsing 2043 // in the ADLC because operands constitute user defined types which are used in 2044 // instruction definitions. 2045 2046 operand vecZ() %{ 2047 constraint(ALLOC_IN_RC(vectorz_reg)); 2048 match(VecZ); 2049 2050 format %{ %} 2051 interface(REG_INTER); 2052 %} 2053 2054 operand legVecZ() %{ 2055 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2056 match(VecZ); 2057 2058 format %{ %} 2059 interface(REG_INTER); 2060 %} 2061 2062 // Comparison Code for FP conditional move 2063 operand cmpOp_vcmppd() %{ 2064 match(Bool); 2065 2066 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2067 n->as_Bool()->_test._test != BoolTest::no_overflow); 2068 format %{ "" %} 2069 interface(COND_INTER) %{ 2070 equal (0x0, "eq"); 2071 less (0x1, "lt"); 2072 less_equal (0x2, "le"); 2073 not_equal (0xC, "ne"); 2074 greater_equal(0xD, "ge"); 2075 greater (0xE, "gt"); 2076 //TODO cannot compile (adlc breaks) without two next lines with error: 2077 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2078 // equal' for overflow. 2079 overflow (0x20, "o"); // not really supported by the instruction 2080 no_overflow (0x21, "no"); // not really supported by the instruction 2081 %} 2082 %} 2083 2084 2085 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2086 2087 // ============================================================================ 2088 2089 instruct ShouldNotReachHere() %{ 2090 match(Halt); 2091 format %{ "ud2\t# ShouldNotReachHere" %} 2092 ins_encode %{ 2093 __ ud2(); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 // =================================EVEX special=============================== 2099 2100 instruct setMask(rRegI dst, rRegI src) %{ 2101 predicate(Matcher::has_predicated_vectors()); 2102 match(Set dst (SetVectMaskI src)); 2103 effect(TEMP dst); 2104 format %{ "setvectmask $dst, $src" %} 2105 ins_encode %{ 2106 __ setvectmask($dst$$Register, $src$$Register); 2107 %} 2108 ins_pipe(pipe_slow); 2109 %} 2110 2111 // ============================================================================ 2112 2113 instruct addF_reg(regF dst, regF src) %{ 2114 predicate((UseSSE>=1) && (UseAVX == 0)); 2115 match(Set dst (AddF dst src)); 2116 2117 format %{ "addss $dst, $src" %} 2118 ins_cost(150); 2119 ins_encode %{ 2120 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2121 %} 2122 ins_pipe(pipe_slow); 2123 %} 2124 2125 instruct addF_mem(regF dst, memory src) %{ 2126 predicate((UseSSE>=1) && (UseAVX == 0)); 2127 match(Set dst (AddF dst (LoadF src))); 2128 2129 format %{ "addss $dst, $src" %} 2130 ins_cost(150); 2131 ins_encode %{ 2132 __ addss($dst$$XMMRegister, $src$$Address); 2133 %} 2134 ins_pipe(pipe_slow); 2135 %} 2136 2137 instruct addF_imm(regF dst, immF con) %{ 2138 predicate((UseSSE>=1) && (UseAVX == 0)); 2139 match(Set dst (AddF dst con)); 2140 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2141 ins_cost(150); 2142 ins_encode %{ 2143 __ addss($dst$$XMMRegister, $constantaddress($con)); 2144 %} 2145 ins_pipe(pipe_slow); 2146 %} 2147 2148 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2149 predicate(UseAVX > 0); 2150 match(Set dst (AddF src1 src2)); 2151 2152 format %{ "vaddss $dst, $src1, $src2" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2161 predicate(UseAVX > 0); 2162 match(Set dst (AddF src1 (LoadF src2))); 2163 2164 format %{ "vaddss $dst, $src1, $src2" %} 2165 ins_cost(150); 2166 ins_encode %{ 2167 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2168 %} 2169 ins_pipe(pipe_slow); 2170 %} 2171 2172 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2173 predicate(UseAVX > 0); 2174 match(Set dst (AddF src con)); 2175 2176 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct addD_reg(regD dst, regD src) %{ 2185 predicate((UseSSE>=2) && (UseAVX == 0)); 2186 match(Set dst (AddD dst src)); 2187 2188 format %{ "addsd $dst, $src" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct addD_mem(regD dst, memory src) %{ 2197 predicate((UseSSE>=2) && (UseAVX == 0)); 2198 match(Set dst (AddD dst (LoadD src))); 2199 2200 format %{ "addsd $dst, $src" %} 2201 ins_cost(150); 2202 ins_encode %{ 2203 __ addsd($dst$$XMMRegister, $src$$Address); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 instruct addD_imm(regD dst, immD con) %{ 2209 predicate((UseSSE>=2) && (UseAVX == 0)); 2210 match(Set dst (AddD dst con)); 2211 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2220 predicate(UseAVX > 0); 2221 match(Set dst (AddD src1 src2)); 2222 2223 format %{ "vaddsd $dst, $src1, $src2" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (AddD src1 (LoadD src2))); 2234 2235 format %{ "vaddsd $dst, $src1, $src2" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2244 predicate(UseAVX > 0); 2245 match(Set dst (AddD src con)); 2246 2247 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct subF_reg(regF dst, regF src) %{ 2256 predicate((UseSSE>=1) && (UseAVX == 0)); 2257 match(Set dst (SubF dst src)); 2258 2259 format %{ "subss $dst, $src" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct subF_mem(regF dst, memory src) %{ 2268 predicate((UseSSE>=1) && (UseAVX == 0)); 2269 match(Set dst (SubF dst (LoadF src))); 2270 2271 format %{ "subss $dst, $src" %} 2272 ins_cost(150); 2273 ins_encode %{ 2274 __ subss($dst$$XMMRegister, $src$$Address); 2275 %} 2276 ins_pipe(pipe_slow); 2277 %} 2278 2279 instruct subF_imm(regF dst, immF con) %{ 2280 predicate((UseSSE>=1) && (UseAVX == 0)); 2281 match(Set dst (SubF dst con)); 2282 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ subss($dst$$XMMRegister, $constantaddress($con)); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2291 predicate(UseAVX > 0); 2292 match(Set dst (SubF src1 src2)); 2293 2294 format %{ "vsubss $dst, $src1, $src2" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (SubF src1 (LoadF src2))); 2305 2306 format %{ "vsubss $dst, $src1, $src2" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2315 predicate(UseAVX > 0); 2316 match(Set dst (SubF src con)); 2317 2318 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct subD_reg(regD dst, regD src) %{ 2327 predicate((UseSSE>=2) && (UseAVX == 0)); 2328 match(Set dst (SubD dst src)); 2329 2330 format %{ "subsd $dst, $src" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct subD_mem(regD dst, memory src) %{ 2339 predicate((UseSSE>=2) && (UseAVX == 0)); 2340 match(Set dst (SubD dst (LoadD src))); 2341 2342 format %{ "subsd $dst, $src" %} 2343 ins_cost(150); 2344 ins_encode %{ 2345 __ subsd($dst$$XMMRegister, $src$$Address); 2346 %} 2347 ins_pipe(pipe_slow); 2348 %} 2349 2350 instruct subD_imm(regD dst, immD con) %{ 2351 predicate((UseSSE>=2) && (UseAVX == 0)); 2352 match(Set dst (SubD dst con)); 2353 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2362 predicate(UseAVX > 0); 2363 match(Set dst (SubD src1 src2)); 2364 2365 format %{ "vsubsd $dst, $src1, $src2" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (SubD src1 (LoadD src2))); 2376 2377 format %{ "vsubsd $dst, $src1, $src2" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2386 predicate(UseAVX > 0); 2387 match(Set dst (SubD src con)); 2388 2389 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct mulF_reg(regF dst, regF src) %{ 2398 predicate((UseSSE>=1) && (UseAVX == 0)); 2399 match(Set dst (MulF dst src)); 2400 2401 format %{ "mulss $dst, $src" %} 2402 ins_cost(150); 2403 ins_encode %{ 2404 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2405 %} 2406 ins_pipe(pipe_slow); 2407 %} 2408 2409 instruct mulF_mem(regF dst, memory src) %{ 2410 predicate((UseSSE>=1) && (UseAVX == 0)); 2411 match(Set dst (MulF dst (LoadF src))); 2412 2413 format %{ "mulss $dst, $src" %} 2414 ins_cost(150); 2415 ins_encode %{ 2416 __ mulss($dst$$XMMRegister, $src$$Address); 2417 %} 2418 ins_pipe(pipe_slow); 2419 %} 2420 2421 instruct mulF_imm(regF dst, immF con) %{ 2422 predicate((UseSSE>=1) && (UseAVX == 0)); 2423 match(Set dst (MulF dst con)); 2424 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2425 ins_cost(150); 2426 ins_encode %{ 2427 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2428 %} 2429 ins_pipe(pipe_slow); 2430 %} 2431 2432 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2433 predicate(UseAVX > 0); 2434 match(Set dst (MulF src1 src2)); 2435 2436 format %{ "vmulss $dst, $src1, $src2" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2445 predicate(UseAVX > 0); 2446 match(Set dst (MulF src1 (LoadF src2))); 2447 2448 format %{ "vmulss $dst, $src1, $src2" %} 2449 ins_cost(150); 2450 ins_encode %{ 2451 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2452 %} 2453 ins_pipe(pipe_slow); 2454 %} 2455 2456 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2457 predicate(UseAVX > 0); 2458 match(Set dst (MulF src con)); 2459 2460 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2461 ins_cost(150); 2462 ins_encode %{ 2463 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2464 %} 2465 ins_pipe(pipe_slow); 2466 %} 2467 2468 instruct mulD_reg(regD dst, regD src) %{ 2469 predicate((UseSSE>=2) && (UseAVX == 0)); 2470 match(Set dst (MulD dst src)); 2471 2472 format %{ "mulsd $dst, $src" %} 2473 ins_cost(150); 2474 ins_encode %{ 2475 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2476 %} 2477 ins_pipe(pipe_slow); 2478 %} 2479 2480 instruct mulD_mem(regD dst, memory src) %{ 2481 predicate((UseSSE>=2) && (UseAVX == 0)); 2482 match(Set dst (MulD dst (LoadD src))); 2483 2484 format %{ "mulsd $dst, $src" %} 2485 ins_cost(150); 2486 ins_encode %{ 2487 __ mulsd($dst$$XMMRegister, $src$$Address); 2488 %} 2489 ins_pipe(pipe_slow); 2490 %} 2491 2492 instruct mulD_imm(regD dst, immD con) %{ 2493 predicate((UseSSE>=2) && (UseAVX == 0)); 2494 match(Set dst (MulD dst con)); 2495 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2496 ins_cost(150); 2497 ins_encode %{ 2498 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2499 %} 2500 ins_pipe(pipe_slow); 2501 %} 2502 2503 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2504 predicate(UseAVX > 0); 2505 match(Set dst (MulD src1 src2)); 2506 2507 format %{ "vmulsd $dst, $src1, $src2" %} 2508 ins_cost(150); 2509 ins_encode %{ 2510 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2516 predicate(UseAVX > 0); 2517 match(Set dst (MulD src1 (LoadD src2))); 2518 2519 format %{ "vmulsd $dst, $src1, $src2" %} 2520 ins_cost(150); 2521 ins_encode %{ 2522 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 2527 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2528 predicate(UseAVX > 0); 2529 match(Set dst (MulD src con)); 2530 2531 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2532 ins_cost(150); 2533 ins_encode %{ 2534 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2535 %} 2536 ins_pipe(pipe_slow); 2537 %} 2538 2539 instruct divF_reg(regF dst, regF src) %{ 2540 predicate((UseSSE>=1) && (UseAVX == 0)); 2541 match(Set dst (DivF dst src)); 2542 2543 format %{ "divss $dst, $src" %} 2544 ins_cost(150); 2545 ins_encode %{ 2546 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2547 %} 2548 ins_pipe(pipe_slow); 2549 %} 2550 2551 instruct divF_mem(regF dst, memory src) %{ 2552 predicate((UseSSE>=1) && (UseAVX == 0)); 2553 match(Set dst (DivF dst (LoadF src))); 2554 2555 format %{ "divss $dst, $src" %} 2556 ins_cost(150); 2557 ins_encode %{ 2558 __ divss($dst$$XMMRegister, $src$$Address); 2559 %} 2560 ins_pipe(pipe_slow); 2561 %} 2562 2563 instruct divF_imm(regF dst, immF con) %{ 2564 predicate((UseSSE>=1) && (UseAVX == 0)); 2565 match(Set dst (DivF dst con)); 2566 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2567 ins_cost(150); 2568 ins_encode %{ 2569 __ divss($dst$$XMMRegister, $constantaddress($con)); 2570 %} 2571 ins_pipe(pipe_slow); 2572 %} 2573 2574 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2575 predicate(UseAVX > 0); 2576 match(Set dst (DivF src1 src2)); 2577 2578 format %{ "vdivss $dst, $src1, $src2" %} 2579 ins_cost(150); 2580 ins_encode %{ 2581 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2582 %} 2583 ins_pipe(pipe_slow); 2584 %} 2585 2586 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2587 predicate(UseAVX > 0); 2588 match(Set dst (DivF src1 (LoadF src2))); 2589 2590 format %{ "vdivss $dst, $src1, $src2" %} 2591 ins_cost(150); 2592 ins_encode %{ 2593 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2594 %} 2595 ins_pipe(pipe_slow); 2596 %} 2597 2598 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2599 predicate(UseAVX > 0); 2600 match(Set dst (DivF src con)); 2601 2602 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2603 ins_cost(150); 2604 ins_encode %{ 2605 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2606 %} 2607 ins_pipe(pipe_slow); 2608 %} 2609 2610 instruct divD_reg(regD dst, regD src) %{ 2611 predicate((UseSSE>=2) && (UseAVX == 0)); 2612 match(Set dst (DivD dst src)); 2613 2614 format %{ "divsd $dst, $src" %} 2615 ins_cost(150); 2616 ins_encode %{ 2617 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2618 %} 2619 ins_pipe(pipe_slow); 2620 %} 2621 2622 instruct divD_mem(regD dst, memory src) %{ 2623 predicate((UseSSE>=2) && (UseAVX == 0)); 2624 match(Set dst (DivD dst (LoadD src))); 2625 2626 format %{ "divsd $dst, $src" %} 2627 ins_cost(150); 2628 ins_encode %{ 2629 __ divsd($dst$$XMMRegister, $src$$Address); 2630 %} 2631 ins_pipe(pipe_slow); 2632 %} 2633 2634 instruct divD_imm(regD dst, immD con) %{ 2635 predicate((UseSSE>=2) && (UseAVX == 0)); 2636 match(Set dst (DivD dst con)); 2637 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2638 ins_cost(150); 2639 ins_encode %{ 2640 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2641 %} 2642 ins_pipe(pipe_slow); 2643 %} 2644 2645 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2646 predicate(UseAVX > 0); 2647 match(Set dst (DivD src1 src2)); 2648 2649 format %{ "vdivsd $dst, $src1, $src2" %} 2650 ins_cost(150); 2651 ins_encode %{ 2652 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2653 %} 2654 ins_pipe(pipe_slow); 2655 %} 2656 2657 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2658 predicate(UseAVX > 0); 2659 match(Set dst (DivD src1 (LoadD src2))); 2660 2661 format %{ "vdivsd $dst, $src1, $src2" %} 2662 ins_cost(150); 2663 ins_encode %{ 2664 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2665 %} 2666 ins_pipe(pipe_slow); 2667 %} 2668 2669 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2670 predicate(UseAVX > 0); 2671 match(Set dst (DivD src con)); 2672 2673 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2674 ins_cost(150); 2675 ins_encode %{ 2676 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2677 %} 2678 ins_pipe(pipe_slow); 2679 %} 2680 2681 instruct absF_reg(regF dst) %{ 2682 predicate((UseSSE>=1) && (UseAVX == 0)); 2683 match(Set dst (AbsF dst)); 2684 ins_cost(150); 2685 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2686 ins_encode %{ 2687 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2688 %} 2689 ins_pipe(pipe_slow); 2690 %} 2691 2692 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2693 predicate(UseAVX > 0); 2694 match(Set dst (AbsF src)); 2695 ins_cost(150); 2696 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2697 ins_encode %{ 2698 int vector_len = 0; 2699 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2700 ExternalAddress(float_signmask()), vector_len); 2701 %} 2702 ins_pipe(pipe_slow); 2703 %} 2704 2705 instruct absD_reg(regD dst) %{ 2706 predicate((UseSSE>=2) && (UseAVX == 0)); 2707 match(Set dst (AbsD dst)); 2708 ins_cost(150); 2709 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2710 "# abs double by sign masking" %} 2711 ins_encode %{ 2712 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2718 predicate(UseAVX > 0); 2719 match(Set dst (AbsD src)); 2720 ins_cost(150); 2721 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2722 "# abs double by sign masking" %} 2723 ins_encode %{ 2724 int vector_len = 0; 2725 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2726 ExternalAddress(double_signmask()), vector_len); 2727 %} 2728 ins_pipe(pipe_slow); 2729 %} 2730 2731 instruct negF_reg(regF dst) %{ 2732 predicate((UseSSE>=1) && (UseAVX == 0)); 2733 match(Set dst (NegF dst)); 2734 ins_cost(150); 2735 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2736 ins_encode %{ 2737 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2743 predicate(UseAVX > 0); 2744 match(Set dst (NegF src)); 2745 ins_cost(150); 2746 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2747 ins_encode %{ 2748 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2749 ExternalAddress(float_signflip())); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct negD_reg(regD dst) %{ 2755 predicate((UseSSE>=2) && (UseAVX == 0)); 2756 match(Set dst (NegD dst)); 2757 ins_cost(150); 2758 format %{ "xorpd $dst, [0x8000000000000000]\t" 2759 "# neg double by sign flipping" %} 2760 ins_encode %{ 2761 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2767 predicate(UseAVX > 0); 2768 match(Set dst (NegD src)); 2769 ins_cost(150); 2770 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2771 "# neg double by sign flipping" %} 2772 ins_encode %{ 2773 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2774 ExternalAddress(double_signflip())); 2775 %} 2776 ins_pipe(pipe_slow); 2777 %} 2778 2779 instruct sqrtF_reg(regF dst, regF src) %{ 2780 predicate(UseSSE>=1); 2781 match(Set dst (SqrtF src)); 2782 2783 format %{ "sqrtss $dst, $src" %} 2784 ins_cost(150); 2785 ins_encode %{ 2786 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2787 %} 2788 ins_pipe(pipe_slow); 2789 %} 2790 2791 instruct sqrtF_mem(regF dst, memory src) %{ 2792 predicate(UseSSE>=1); 2793 match(Set dst (SqrtF (LoadF src))); 2794 2795 format %{ "sqrtss $dst, $src" %} 2796 ins_cost(150); 2797 ins_encode %{ 2798 __ sqrtss($dst$$XMMRegister, $src$$Address); 2799 %} 2800 ins_pipe(pipe_slow); 2801 %} 2802 2803 instruct sqrtF_imm(regF dst, immF con) %{ 2804 predicate(UseSSE>=1); 2805 match(Set dst (SqrtF con)); 2806 2807 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2808 ins_cost(150); 2809 ins_encode %{ 2810 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2811 %} 2812 ins_pipe(pipe_slow); 2813 %} 2814 2815 instruct sqrtD_reg(regD dst, regD src) %{ 2816 predicate(UseSSE>=2); 2817 match(Set dst (SqrtD src)); 2818 2819 format %{ "sqrtsd $dst, $src" %} 2820 ins_cost(150); 2821 ins_encode %{ 2822 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2823 %} 2824 ins_pipe(pipe_slow); 2825 %} 2826 2827 instruct sqrtD_mem(regD dst, memory src) %{ 2828 predicate(UseSSE>=2); 2829 match(Set dst (SqrtD (LoadD src))); 2830 2831 format %{ "sqrtsd $dst, $src" %} 2832 ins_cost(150); 2833 ins_encode %{ 2834 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2835 %} 2836 ins_pipe(pipe_slow); 2837 %} 2838 2839 instruct sqrtD_imm(regD dst, immD con) %{ 2840 predicate(UseSSE>=2); 2841 match(Set dst (SqrtD con)); 2842 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2843 ins_cost(150); 2844 ins_encode %{ 2845 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2846 %} 2847 ins_pipe(pipe_slow); 2848 %} 2849 2850 instruct onspinwait() %{ 2851 match(OnSpinWait); 2852 ins_cost(200); 2853 2854 format %{ 2855 $$template 2856 $$emit$$"pause\t! membar_onspinwait" 2857 %} 2858 ins_encode %{ 2859 __ pause(); 2860 %} 2861 ins_pipe(pipe_slow); 2862 %} 2863 2864 // a * b + c 2865 instruct fmaD_reg(regD a, regD b, regD c) %{ 2866 predicate(UseFMA); 2867 match(Set c (FmaD c (Binary a b))); 2868 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2869 ins_cost(150); 2870 ins_encode %{ 2871 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2872 %} 2873 ins_pipe( pipe_slow ); 2874 %} 2875 2876 // a * b + c 2877 instruct fmaF_reg(regF a, regF b, regF c) %{ 2878 predicate(UseFMA); 2879 match(Set c (FmaF c (Binary a b))); 2880 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2881 ins_cost(150); 2882 ins_encode %{ 2883 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2884 %} 2885 ins_pipe( pipe_slow ); 2886 %} 2887 2888 // ====================VECTOR INSTRUCTIONS===================================== 2889 2890 2891 // Load vectors (4 bytes long) 2892 instruct loadV4(vecS dst, memory mem) %{ 2893 predicate(n->as_LoadVector()->memory_size() == 4); 2894 match(Set dst (LoadVector mem)); 2895 ins_cost(125); 2896 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2897 ins_encode %{ 2898 __ movdl($dst$$XMMRegister, $mem$$Address); 2899 %} 2900 ins_pipe( pipe_slow ); 2901 %} 2902 2903 // Load vectors (4 bytes long) 2904 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2905 match(Set dst src); 2906 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2907 ins_encode %{ 2908 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2909 %} 2910 ins_pipe( fpu_reg_reg ); 2911 %} 2912 2913 // Load vectors (4 bytes long) 2914 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2915 match(Set dst src); 2916 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2917 ins_encode %{ 2918 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2919 %} 2920 ins_pipe( fpu_reg_reg ); 2921 %} 2922 2923 // Load vectors (8 bytes long) 2924 instruct loadV8(vecD dst, memory mem) %{ 2925 predicate(n->as_LoadVector()->memory_size() == 8); 2926 match(Set dst (LoadVector mem)); 2927 ins_cost(125); 2928 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2929 ins_encode %{ 2930 __ movq($dst$$XMMRegister, $mem$$Address); 2931 %} 2932 ins_pipe( pipe_slow ); 2933 %} 2934 2935 // Load vectors (8 bytes long) 2936 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2937 match(Set dst src); 2938 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2939 ins_encode %{ 2940 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2941 %} 2942 ins_pipe( fpu_reg_reg ); 2943 %} 2944 2945 // Load vectors (8 bytes long) 2946 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2947 match(Set dst src); 2948 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2949 ins_encode %{ 2950 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2951 %} 2952 ins_pipe( fpu_reg_reg ); 2953 %} 2954 2955 // Load vectors (16 bytes long) 2956 instruct loadV16(vecX dst, memory mem) %{ 2957 predicate(n->as_LoadVector()->memory_size() == 16); 2958 match(Set dst (LoadVector mem)); 2959 ins_cost(125); 2960 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2961 ins_encode %{ 2962 __ movdqu($dst$$XMMRegister, $mem$$Address); 2963 %} 2964 ins_pipe( pipe_slow ); 2965 %} 2966 2967 // Load vectors (16 bytes long) 2968 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2969 match(Set dst src); 2970 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2971 ins_encode %{ 2972 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2973 int vector_len = 2; 2974 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2975 } else { 2976 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2977 } 2978 %} 2979 ins_pipe( fpu_reg_reg ); 2980 %} 2981 2982 // Load vectors (16 bytes long) 2983 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2984 match(Set dst src); 2985 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2986 ins_encode %{ 2987 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2988 int vector_len = 2; 2989 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2990 } else { 2991 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2992 } 2993 %} 2994 ins_pipe( fpu_reg_reg ); 2995 %} 2996 2997 // Load vectors (32 bytes long) 2998 instruct loadV32(vecY dst, memory mem) %{ 2999 predicate(n->as_LoadVector()->memory_size() == 32); 3000 match(Set dst (LoadVector mem)); 3001 ins_cost(125); 3002 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3003 ins_encode %{ 3004 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3005 %} 3006 ins_pipe( pipe_slow ); 3007 %} 3008 3009 // Load vectors (32 bytes long) 3010 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 3011 match(Set dst src); 3012 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3013 ins_encode %{ 3014 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3015 int vector_len = 2; 3016 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3017 } else { 3018 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3019 } 3020 %} 3021 ins_pipe( fpu_reg_reg ); 3022 %} 3023 3024 // Load vectors (32 bytes long) 3025 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 3026 match(Set dst src); 3027 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3028 ins_encode %{ 3029 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3030 int vector_len = 2; 3031 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3032 } else { 3033 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3034 } 3035 %} 3036 ins_pipe( fpu_reg_reg ); 3037 %} 3038 3039 // Load vectors (64 bytes long) 3040 instruct loadV64_dword(vecZ dst, memory mem) %{ 3041 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3042 match(Set dst (LoadVector mem)); 3043 ins_cost(125); 3044 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3045 ins_encode %{ 3046 int vector_len = 2; 3047 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3048 %} 3049 ins_pipe( pipe_slow ); 3050 %} 3051 3052 // Load vectors (64 bytes long) 3053 instruct loadV64_qword(vecZ dst, memory mem) %{ 3054 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3055 match(Set dst (LoadVector mem)); 3056 ins_cost(125); 3057 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3058 ins_encode %{ 3059 int vector_len = 2; 3060 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3061 %} 3062 ins_pipe( pipe_slow ); 3063 %} 3064 3065 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3066 match(Set dst src); 3067 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3068 ins_encode %{ 3069 int vector_len = 2; 3070 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3071 %} 3072 ins_pipe( fpu_reg_reg ); 3073 %} 3074 3075 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3076 match(Set dst src); 3077 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3078 ins_encode %{ 3079 int vector_len = 2; 3080 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3081 %} 3082 ins_pipe( fpu_reg_reg ); 3083 %} 3084 3085 // Store vectors 3086 instruct storeV4(memory mem, vecS src) %{ 3087 predicate(n->as_StoreVector()->memory_size() == 4); 3088 match(Set mem (StoreVector mem src)); 3089 ins_cost(145); 3090 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3091 ins_encode %{ 3092 __ movdl($mem$$Address, $src$$XMMRegister); 3093 %} 3094 ins_pipe( pipe_slow ); 3095 %} 3096 3097 instruct storeV8(memory mem, vecD src) %{ 3098 predicate(n->as_StoreVector()->memory_size() == 8); 3099 match(Set mem (StoreVector mem src)); 3100 ins_cost(145); 3101 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3102 ins_encode %{ 3103 __ movq($mem$$Address, $src$$XMMRegister); 3104 %} 3105 ins_pipe( pipe_slow ); 3106 %} 3107 3108 instruct storeV16(memory mem, vecX src) %{ 3109 predicate(n->as_StoreVector()->memory_size() == 16); 3110 match(Set mem (StoreVector mem src)); 3111 ins_cost(145); 3112 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3113 ins_encode %{ 3114 __ movdqu($mem$$Address, $src$$XMMRegister); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 instruct storeV32(memory mem, vecY src) %{ 3120 predicate(n->as_StoreVector()->memory_size() == 32); 3121 match(Set mem (StoreVector mem src)); 3122 ins_cost(145); 3123 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3124 ins_encode %{ 3125 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3126 %} 3127 ins_pipe( pipe_slow ); 3128 %} 3129 3130 instruct storeV64_dword(memory mem, vecZ src) %{ 3131 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3132 match(Set mem (StoreVector mem src)); 3133 ins_cost(145); 3134 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3135 ins_encode %{ 3136 int vector_len = 2; 3137 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3138 %} 3139 ins_pipe( pipe_slow ); 3140 %} 3141 3142 instruct storeV64_qword(memory mem, vecZ src) %{ 3143 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3144 match(Set mem (StoreVector mem src)); 3145 ins_cost(145); 3146 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3147 ins_encode %{ 3148 int vector_len = 2; 3149 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3150 %} 3151 ins_pipe( pipe_slow ); 3152 %} 3153 3154 // ====================LEGACY REPLICATE======================================= 3155 3156 instruct Repl16B(vecX dst, rRegI src) %{ 3157 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3158 match(Set dst (ReplicateB src)); 3159 format %{ "movd $dst,$src\n\t" 3160 "punpcklbw $dst,$dst\n\t" 3161 "pshuflw $dst,$dst,0x00\n\t" 3162 "punpcklqdq $dst,$dst\t! replicate16B" %} 3163 ins_encode %{ 3164 __ movdl($dst$$XMMRegister, $src$$Register); 3165 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3166 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3167 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3168 %} 3169 ins_pipe( pipe_slow ); 3170 %} 3171 3172 instruct Repl32B(vecY dst, rRegI src) %{ 3173 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3174 match(Set dst (ReplicateB src)); 3175 format %{ "movd $dst,$src\n\t" 3176 "punpcklbw $dst,$dst\n\t" 3177 "pshuflw $dst,$dst,0x00\n\t" 3178 "punpcklqdq $dst,$dst\n\t" 3179 "vinserti128_high $dst,$dst\t! replicate32B" %} 3180 ins_encode %{ 3181 __ movdl($dst$$XMMRegister, $src$$Register); 3182 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3183 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3184 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3185 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3186 %} 3187 ins_pipe( pipe_slow ); 3188 %} 3189 3190 instruct Repl64B(legVecZ dst, rRegI src) %{ 3191 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3192 match(Set dst (ReplicateB src)); 3193 format %{ "movd $dst,$src\n\t" 3194 "punpcklbw $dst,$dst\n\t" 3195 "pshuflw $dst,$dst,0x00\n\t" 3196 "punpcklqdq $dst,$dst\n\t" 3197 "vinserti128_high $dst,$dst\t" 3198 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3199 ins_encode %{ 3200 __ movdl($dst$$XMMRegister, $src$$Register); 3201 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3202 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3203 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3204 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3205 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3206 %} 3207 ins_pipe( pipe_slow ); 3208 %} 3209 3210 instruct Repl16B_imm(vecX dst, immI con) %{ 3211 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3212 match(Set dst (ReplicateB con)); 3213 format %{ "movq $dst,[$constantaddress]\n\t" 3214 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3215 ins_encode %{ 3216 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3217 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3218 %} 3219 ins_pipe( pipe_slow ); 3220 %} 3221 3222 instruct Repl32B_imm(vecY dst, immI con) %{ 3223 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3224 match(Set dst (ReplicateB con)); 3225 format %{ "movq $dst,[$constantaddress]\n\t" 3226 "punpcklqdq $dst,$dst\n\t" 3227 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3228 ins_encode %{ 3229 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3230 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3231 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3232 %} 3233 ins_pipe( pipe_slow ); 3234 %} 3235 3236 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3237 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3238 match(Set dst (ReplicateB con)); 3239 format %{ "movq $dst,[$constantaddress]\n\t" 3240 "punpcklqdq $dst,$dst\n\t" 3241 "vinserti128_high $dst,$dst\t" 3242 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3243 ins_encode %{ 3244 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3245 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3246 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3247 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3248 %} 3249 ins_pipe( pipe_slow ); 3250 %} 3251 3252 instruct Repl4S(vecD dst, rRegI src) %{ 3253 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3254 match(Set dst (ReplicateS src)); 3255 format %{ "movd $dst,$src\n\t" 3256 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3257 ins_encode %{ 3258 __ movdl($dst$$XMMRegister, $src$$Register); 3259 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3260 %} 3261 ins_pipe( pipe_slow ); 3262 %} 3263 3264 instruct Repl4S_mem(vecD dst, memory mem) %{ 3265 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3266 match(Set dst (ReplicateS (LoadS mem))); 3267 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3268 ins_encode %{ 3269 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3270 %} 3271 ins_pipe( pipe_slow ); 3272 %} 3273 3274 instruct Repl8S(vecX dst, rRegI src) %{ 3275 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3276 match(Set dst (ReplicateS src)); 3277 format %{ "movd $dst,$src\n\t" 3278 "pshuflw $dst,$dst,0x00\n\t" 3279 "punpcklqdq $dst,$dst\t! replicate8S" %} 3280 ins_encode %{ 3281 __ movdl($dst$$XMMRegister, $src$$Register); 3282 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3283 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 instruct Repl8S_mem(vecX dst, memory mem) %{ 3289 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3290 match(Set dst (ReplicateS (LoadS mem))); 3291 format %{ "pshuflw $dst,$mem,0x00\n\t" 3292 "punpcklqdq $dst,$dst\t! replicate8S" %} 3293 ins_encode %{ 3294 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3295 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct Repl8S_imm(vecX dst, immI con) %{ 3301 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3302 match(Set dst (ReplicateS con)); 3303 format %{ "movq $dst,[$constantaddress]\n\t" 3304 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3305 ins_encode %{ 3306 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3307 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3308 %} 3309 ins_pipe( pipe_slow ); 3310 %} 3311 3312 instruct Repl16S(vecY dst, rRegI src) %{ 3313 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3314 match(Set dst (ReplicateS src)); 3315 format %{ "movd $dst,$src\n\t" 3316 "pshuflw $dst,$dst,0x00\n\t" 3317 "punpcklqdq $dst,$dst\n\t" 3318 "vinserti128_high $dst,$dst\t! replicate16S" %} 3319 ins_encode %{ 3320 __ movdl($dst$$XMMRegister, $src$$Register); 3321 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3322 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3323 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 instruct Repl16S_mem(vecY dst, memory mem) %{ 3329 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3330 match(Set dst (ReplicateS (LoadS mem))); 3331 format %{ "pshuflw $dst,$mem,0x00\n\t" 3332 "punpcklqdq $dst,$dst\n\t" 3333 "vinserti128_high $dst,$dst\t! replicate16S" %} 3334 ins_encode %{ 3335 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3336 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3337 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3338 %} 3339 ins_pipe( pipe_slow ); 3340 %} 3341 3342 instruct Repl16S_imm(vecY dst, immI con) %{ 3343 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3344 match(Set dst (ReplicateS con)); 3345 format %{ "movq $dst,[$constantaddress]\n\t" 3346 "punpcklqdq $dst,$dst\n\t" 3347 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3348 ins_encode %{ 3349 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3350 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl32S(legVecZ dst, rRegI src) %{ 3357 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3358 match(Set dst (ReplicateS src)); 3359 format %{ "movd $dst,$src\n\t" 3360 "pshuflw $dst,$dst,0x00\n\t" 3361 "punpcklqdq $dst,$dst\n\t" 3362 "vinserti128_high $dst,$dst\t" 3363 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3364 ins_encode %{ 3365 __ movdl($dst$$XMMRegister, $src$$Register); 3366 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3367 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3368 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3369 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3370 %} 3371 ins_pipe( pipe_slow ); 3372 %} 3373 3374 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3375 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3376 match(Set dst (ReplicateS (LoadS mem))); 3377 format %{ "pshuflw $dst,$mem,0x00\n\t" 3378 "punpcklqdq $dst,$dst\n\t" 3379 "vinserti128_high $dst,$dst\t" 3380 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3381 ins_encode %{ 3382 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3383 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3384 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3385 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3386 %} 3387 ins_pipe( pipe_slow ); 3388 %} 3389 3390 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3391 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3392 match(Set dst (ReplicateS con)); 3393 format %{ "movq $dst,[$constantaddress]\n\t" 3394 "punpcklqdq $dst,$dst\n\t" 3395 "vinserti128_high $dst,$dst\t" 3396 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3397 ins_encode %{ 3398 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3399 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3400 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3401 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3402 %} 3403 ins_pipe( pipe_slow ); 3404 %} 3405 3406 instruct Repl4I(vecX dst, rRegI src) %{ 3407 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3408 match(Set dst (ReplicateI src)); 3409 format %{ "movd $dst,$src\n\t" 3410 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3411 ins_encode %{ 3412 __ movdl($dst$$XMMRegister, $src$$Register); 3413 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3414 %} 3415 ins_pipe( pipe_slow ); 3416 %} 3417 3418 instruct Repl4I_mem(vecX dst, memory mem) %{ 3419 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3420 match(Set dst (ReplicateI (LoadI mem))); 3421 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3422 ins_encode %{ 3423 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3424 %} 3425 ins_pipe( pipe_slow ); 3426 %} 3427 3428 instruct Repl8I(vecY dst, rRegI src) %{ 3429 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3430 match(Set dst (ReplicateI src)); 3431 format %{ "movd $dst,$src\n\t" 3432 "pshufd $dst,$dst,0x00\n\t" 3433 "vinserti128_high $dst,$dst\t! replicate8I" %} 3434 ins_encode %{ 3435 __ movdl($dst$$XMMRegister, $src$$Register); 3436 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3437 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3438 %} 3439 ins_pipe( pipe_slow ); 3440 %} 3441 3442 instruct Repl8I_mem(vecY dst, memory mem) %{ 3443 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3444 match(Set dst (ReplicateI (LoadI mem))); 3445 format %{ "pshufd $dst,$mem,0x00\n\t" 3446 "vinserti128_high $dst,$dst\t! replicate8I" %} 3447 ins_encode %{ 3448 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3449 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3450 %} 3451 ins_pipe( pipe_slow ); 3452 %} 3453 3454 instruct Repl16I(legVecZ dst, rRegI src) %{ 3455 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3456 match(Set dst (ReplicateI src)); 3457 format %{ "movd $dst,$src\n\t" 3458 "pshufd $dst,$dst,0x00\n\t" 3459 "vinserti128_high $dst,$dst\t" 3460 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3461 ins_encode %{ 3462 __ movdl($dst$$XMMRegister, $src$$Register); 3463 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3464 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3465 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3471 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3472 match(Set dst (ReplicateI (LoadI mem))); 3473 format %{ "pshufd $dst,$mem,0x00\n\t" 3474 "vinserti128_high $dst,$dst\t" 3475 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3476 ins_encode %{ 3477 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3478 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3479 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3480 %} 3481 ins_pipe( pipe_slow ); 3482 %} 3483 3484 instruct Repl4I_imm(vecX dst, immI con) %{ 3485 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3486 match(Set dst (ReplicateI con)); 3487 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3488 "punpcklqdq $dst,$dst" %} 3489 ins_encode %{ 3490 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3491 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3492 %} 3493 ins_pipe( pipe_slow ); 3494 %} 3495 3496 instruct Repl8I_imm(vecY dst, immI con) %{ 3497 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3498 match(Set dst (ReplicateI con)); 3499 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3500 "punpcklqdq $dst,$dst\n\t" 3501 "vinserti128_high $dst,$dst" %} 3502 ins_encode %{ 3503 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3504 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3505 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3506 %} 3507 ins_pipe( pipe_slow ); 3508 %} 3509 3510 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3511 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3512 match(Set dst (ReplicateI con)); 3513 format %{ "movq $dst,[$constantaddress]\t" 3514 "punpcklqdq $dst,$dst\n\t" 3515 "vinserti128_high $dst,$dst" 3516 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3517 ins_encode %{ 3518 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3519 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3520 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3521 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3522 %} 3523 ins_pipe( pipe_slow ); 3524 %} 3525 3526 // Long could be loaded into xmm register directly from memory. 3527 instruct Repl2L_mem(vecX dst, memory mem) %{ 3528 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3529 match(Set dst (ReplicateL (LoadL mem))); 3530 format %{ "movq $dst,$mem\n\t" 3531 "punpcklqdq $dst,$dst\t! replicate2L" %} 3532 ins_encode %{ 3533 __ movq($dst$$XMMRegister, $mem$$Address); 3534 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3535 %} 3536 ins_pipe( pipe_slow ); 3537 %} 3538 3539 // Replicate long (8 byte) scalar to be vector 3540 #ifdef _LP64 3541 instruct Repl4L(vecY dst, rRegL src) %{ 3542 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3543 match(Set dst (ReplicateL src)); 3544 format %{ "movdq $dst,$src\n\t" 3545 "punpcklqdq $dst,$dst\n\t" 3546 "vinserti128_high $dst,$dst\t! replicate4L" %} 3547 ins_encode %{ 3548 __ movdq($dst$$XMMRegister, $src$$Register); 3549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3550 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe( pipe_slow ); 3553 %} 3554 3555 instruct Repl8L(legVecZ dst, rRegL src) %{ 3556 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3557 match(Set dst (ReplicateL src)); 3558 format %{ "movdq $dst,$src\n\t" 3559 "punpcklqdq $dst,$dst\n\t" 3560 "vinserti128_high $dst,$dst\t" 3561 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3562 ins_encode %{ 3563 __ movdq($dst$$XMMRegister, $src$$Register); 3564 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3565 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3566 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3567 %} 3568 ins_pipe( pipe_slow ); 3569 %} 3570 #else // _LP64 3571 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3572 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3573 match(Set dst (ReplicateL src)); 3574 effect(TEMP dst, USE src, TEMP tmp); 3575 format %{ "movdl $dst,$src.lo\n\t" 3576 "movdl $tmp,$src.hi\n\t" 3577 "punpckldq $dst,$tmp\n\t" 3578 "punpcklqdq $dst,$dst\n\t" 3579 "vinserti128_high $dst,$dst\t! replicate4L" %} 3580 ins_encode %{ 3581 __ movdl($dst$$XMMRegister, $src$$Register); 3582 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3583 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3584 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3585 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3586 %} 3587 ins_pipe( pipe_slow ); 3588 %} 3589 3590 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3591 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3592 match(Set dst (ReplicateL src)); 3593 effect(TEMP dst, USE src, TEMP tmp); 3594 format %{ "movdl $dst,$src.lo\n\t" 3595 "movdl $tmp,$src.hi\n\t" 3596 "punpckldq $dst,$tmp\n\t" 3597 "punpcklqdq $dst,$dst\n\t" 3598 "vinserti128_high $dst,$dst\t" 3599 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3600 ins_encode %{ 3601 __ movdl($dst$$XMMRegister, $src$$Register); 3602 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3603 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3604 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3605 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3606 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 #endif // _LP64 3611 3612 instruct Repl4L_imm(vecY dst, immL con) %{ 3613 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3614 match(Set dst (ReplicateL con)); 3615 format %{ "movq $dst,[$constantaddress]\n\t" 3616 "punpcklqdq $dst,$dst\n\t" 3617 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3618 ins_encode %{ 3619 __ movq($dst$$XMMRegister, $constantaddress($con)); 3620 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3621 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3622 %} 3623 ins_pipe( pipe_slow ); 3624 %} 3625 3626 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3627 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3628 match(Set dst (ReplicateL con)); 3629 format %{ "movq $dst,[$constantaddress]\n\t" 3630 "punpcklqdq $dst,$dst\n\t" 3631 "vinserti128_high $dst,$dst\t" 3632 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3633 ins_encode %{ 3634 __ movq($dst$$XMMRegister, $constantaddress($con)); 3635 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3636 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3637 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3638 %} 3639 ins_pipe( pipe_slow ); 3640 %} 3641 3642 instruct Repl4L_mem(vecY dst, memory mem) %{ 3643 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3644 match(Set dst (ReplicateL (LoadL mem))); 3645 format %{ "movq $dst,$mem\n\t" 3646 "punpcklqdq $dst,$dst\n\t" 3647 "vinserti128_high $dst,$dst\t! replicate4L" %} 3648 ins_encode %{ 3649 __ movq($dst$$XMMRegister, $mem$$Address); 3650 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3651 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3652 %} 3653 ins_pipe( pipe_slow ); 3654 %} 3655 3656 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3657 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3658 match(Set dst (ReplicateL (LoadL mem))); 3659 format %{ "movq $dst,$mem\n\t" 3660 "punpcklqdq $dst,$dst\n\t" 3661 "vinserti128_high $dst,$dst\t" 3662 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3663 ins_encode %{ 3664 __ movq($dst$$XMMRegister, $mem$$Address); 3665 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3666 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3667 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3668 %} 3669 ins_pipe( pipe_slow ); 3670 %} 3671 3672 instruct Repl2F_mem(vecD dst, memory mem) %{ 3673 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3674 match(Set dst (ReplicateF (LoadF mem))); 3675 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3676 ins_encode %{ 3677 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3678 %} 3679 ins_pipe( pipe_slow ); 3680 %} 3681 3682 instruct Repl4F_mem(vecX dst, memory mem) %{ 3683 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3684 match(Set dst (ReplicateF (LoadF mem))); 3685 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3686 ins_encode %{ 3687 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3688 %} 3689 ins_pipe( pipe_slow ); 3690 %} 3691 3692 instruct Repl8F(vecY dst, vlRegF src) %{ 3693 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3694 match(Set dst (ReplicateF src)); 3695 format %{ "pshufd $dst,$src,0x00\n\t" 3696 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3697 ins_encode %{ 3698 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3699 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3700 %} 3701 ins_pipe( pipe_slow ); 3702 %} 3703 3704 instruct Repl8F_mem(vecY dst, memory mem) %{ 3705 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3706 match(Set dst (ReplicateF (LoadF mem))); 3707 format %{ "pshufd $dst,$mem,0x00\n\t" 3708 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3709 ins_encode %{ 3710 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3711 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3712 %} 3713 ins_pipe( pipe_slow ); 3714 %} 3715 3716 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3717 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3718 match(Set dst (ReplicateF src)); 3719 format %{ "pshufd $dst,$src,0x00\n\t" 3720 "vinsertf128_high $dst,$dst\t" 3721 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3722 ins_encode %{ 3723 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3724 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3725 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3726 %} 3727 ins_pipe( pipe_slow ); 3728 %} 3729 3730 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3731 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3732 match(Set dst (ReplicateF (LoadF mem))); 3733 format %{ "pshufd $dst,$mem,0x00\n\t" 3734 "vinsertf128_high $dst,$dst\t" 3735 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3736 ins_encode %{ 3737 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3738 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3739 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3740 %} 3741 ins_pipe( pipe_slow ); 3742 %} 3743 3744 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3745 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3746 match(Set dst (ReplicateF zero)); 3747 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3748 ins_encode %{ 3749 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3750 %} 3751 ins_pipe( fpu_reg_reg ); 3752 %} 3753 3754 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3755 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3756 match(Set dst (ReplicateF zero)); 3757 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3758 ins_encode %{ 3759 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3760 %} 3761 ins_pipe( fpu_reg_reg ); 3762 %} 3763 3764 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3765 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3766 match(Set dst (ReplicateF zero)); 3767 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3768 ins_encode %{ 3769 int vector_len = 1; 3770 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3771 %} 3772 ins_pipe( fpu_reg_reg ); 3773 %} 3774 3775 instruct Repl2D_mem(vecX dst, memory mem) %{ 3776 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3777 match(Set dst (ReplicateD (LoadD mem))); 3778 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3779 ins_encode %{ 3780 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3781 %} 3782 ins_pipe( pipe_slow ); 3783 %} 3784 3785 instruct Repl4D(vecY dst, vlRegD src) %{ 3786 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3787 match(Set dst (ReplicateD src)); 3788 format %{ "pshufd $dst,$src,0x44\n\t" 3789 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3790 ins_encode %{ 3791 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3792 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3793 %} 3794 ins_pipe( pipe_slow ); 3795 %} 3796 3797 instruct Repl4D_mem(vecY dst, memory mem) %{ 3798 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3799 match(Set dst (ReplicateD (LoadD mem))); 3800 format %{ "pshufd $dst,$mem,0x44\n\t" 3801 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3802 ins_encode %{ 3803 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3804 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3810 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3811 match(Set dst (ReplicateD src)); 3812 format %{ "pshufd $dst,$src,0x44\n\t" 3813 "vinsertf128_high $dst,$dst\t" 3814 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3815 ins_encode %{ 3816 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3817 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3818 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3819 %} 3820 ins_pipe( pipe_slow ); 3821 %} 3822 3823 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3824 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3825 match(Set dst (ReplicateD (LoadD mem))); 3826 format %{ "pshufd $dst,$mem,0x44\n\t" 3827 "vinsertf128_high $dst,$dst\t" 3828 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3829 ins_encode %{ 3830 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3831 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3832 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3833 %} 3834 ins_pipe( pipe_slow ); 3835 %} 3836 3837 // Replicate double (8 byte) scalar zero to be vector 3838 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3839 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3840 match(Set dst (ReplicateD zero)); 3841 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3842 ins_encode %{ 3843 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3844 %} 3845 ins_pipe( fpu_reg_reg ); 3846 %} 3847 3848 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3849 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3850 match(Set dst (ReplicateD zero)); 3851 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3852 ins_encode %{ 3853 int vector_len = 1; 3854 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3855 %} 3856 ins_pipe( fpu_reg_reg ); 3857 %} 3858 3859 // ====================GENERIC REPLICATE========================================== 3860 3861 // Replicate byte scalar to be vector 3862 instruct Repl4B(vecS dst, rRegI src) %{ 3863 predicate(n->as_Vector()->length() == 4); 3864 match(Set dst (ReplicateB src)); 3865 format %{ "movd $dst,$src\n\t" 3866 "punpcklbw $dst,$dst\n\t" 3867 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3868 ins_encode %{ 3869 __ movdl($dst$$XMMRegister, $src$$Register); 3870 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3871 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct Repl8B(vecD dst, rRegI src) %{ 3877 predicate(n->as_Vector()->length() == 8); 3878 match(Set dst (ReplicateB src)); 3879 format %{ "movd $dst,$src\n\t" 3880 "punpcklbw $dst,$dst\n\t" 3881 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3882 ins_encode %{ 3883 __ movdl($dst$$XMMRegister, $src$$Register); 3884 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3885 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3886 %} 3887 ins_pipe( pipe_slow ); 3888 %} 3889 3890 // Replicate byte scalar immediate to be vector by loading from const table. 3891 instruct Repl4B_imm(vecS dst, immI con) %{ 3892 predicate(n->as_Vector()->length() == 4); 3893 match(Set dst (ReplicateB con)); 3894 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3895 ins_encode %{ 3896 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3897 %} 3898 ins_pipe( pipe_slow ); 3899 %} 3900 3901 instruct Repl8B_imm(vecD dst, immI con) %{ 3902 predicate(n->as_Vector()->length() == 8); 3903 match(Set dst (ReplicateB con)); 3904 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3905 ins_encode %{ 3906 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3907 %} 3908 ins_pipe( pipe_slow ); 3909 %} 3910 3911 // Replicate byte scalar zero to be vector 3912 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3913 predicate(n->as_Vector()->length() == 4); 3914 match(Set dst (ReplicateB zero)); 3915 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3916 ins_encode %{ 3917 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3918 %} 3919 ins_pipe( fpu_reg_reg ); 3920 %} 3921 3922 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3923 predicate(n->as_Vector()->length() == 8); 3924 match(Set dst (ReplicateB zero)); 3925 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3926 ins_encode %{ 3927 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3928 %} 3929 ins_pipe( fpu_reg_reg ); 3930 %} 3931 3932 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3933 predicate(n->as_Vector()->length() == 16); 3934 match(Set dst (ReplicateB zero)); 3935 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3936 ins_encode %{ 3937 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3938 %} 3939 ins_pipe( fpu_reg_reg ); 3940 %} 3941 3942 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3943 predicate(n->as_Vector()->length() == 32); 3944 match(Set dst (ReplicateB zero)); 3945 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3946 ins_encode %{ 3947 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3948 int vector_len = 1; 3949 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3950 %} 3951 ins_pipe( fpu_reg_reg ); 3952 %} 3953 3954 // Replicate char/short (2 byte) scalar to be vector 3955 instruct Repl2S(vecS dst, rRegI src) %{ 3956 predicate(n->as_Vector()->length() == 2); 3957 match(Set dst (ReplicateS src)); 3958 format %{ "movd $dst,$src\n\t" 3959 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3960 ins_encode %{ 3961 __ movdl($dst$$XMMRegister, $src$$Register); 3962 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3963 %} 3964 ins_pipe( fpu_reg_reg ); 3965 %} 3966 3967 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3968 instruct Repl2S_imm(vecS dst, immI con) %{ 3969 predicate(n->as_Vector()->length() == 2); 3970 match(Set dst (ReplicateS con)); 3971 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3972 ins_encode %{ 3973 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3974 %} 3975 ins_pipe( fpu_reg_reg ); 3976 %} 3977 3978 instruct Repl4S_imm(vecD dst, immI con) %{ 3979 predicate(n->as_Vector()->length() == 4); 3980 match(Set dst (ReplicateS con)); 3981 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3982 ins_encode %{ 3983 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3984 %} 3985 ins_pipe( fpu_reg_reg ); 3986 %} 3987 3988 // Replicate char/short (2 byte) scalar zero to be vector 3989 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3990 predicate(n->as_Vector()->length() == 2); 3991 match(Set dst (ReplicateS zero)); 3992 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3993 ins_encode %{ 3994 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3995 %} 3996 ins_pipe( fpu_reg_reg ); 3997 %} 3998 3999 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4000 predicate(n->as_Vector()->length() == 4); 4001 match(Set dst (ReplicateS zero)); 4002 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4003 ins_encode %{ 4004 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4005 %} 4006 ins_pipe( fpu_reg_reg ); 4007 %} 4008 4009 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4010 predicate(n->as_Vector()->length() == 8); 4011 match(Set dst (ReplicateS zero)); 4012 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4013 ins_encode %{ 4014 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4015 %} 4016 ins_pipe( fpu_reg_reg ); 4017 %} 4018 4019 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4020 predicate(n->as_Vector()->length() == 16); 4021 match(Set dst (ReplicateS zero)); 4022 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4023 ins_encode %{ 4024 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4025 int vector_len = 1; 4026 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4027 %} 4028 ins_pipe( fpu_reg_reg ); 4029 %} 4030 4031 // Replicate integer (4 byte) scalar to be vector 4032 instruct Repl2I(vecD dst, rRegI src) %{ 4033 predicate(n->as_Vector()->length() == 2); 4034 match(Set dst (ReplicateI src)); 4035 format %{ "movd $dst,$src\n\t" 4036 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4037 ins_encode %{ 4038 __ movdl($dst$$XMMRegister, $src$$Register); 4039 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4040 %} 4041 ins_pipe( fpu_reg_reg ); 4042 %} 4043 4044 // Integer could be loaded into xmm register directly from memory. 4045 instruct Repl2I_mem(vecD dst, memory mem) %{ 4046 predicate(n->as_Vector()->length() == 2); 4047 match(Set dst (ReplicateI (LoadI mem))); 4048 format %{ "movd $dst,$mem\n\t" 4049 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4050 ins_encode %{ 4051 __ movdl($dst$$XMMRegister, $mem$$Address); 4052 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4053 %} 4054 ins_pipe( fpu_reg_reg ); 4055 %} 4056 4057 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4058 instruct Repl2I_imm(vecD dst, immI con) %{ 4059 predicate(n->as_Vector()->length() == 2); 4060 match(Set dst (ReplicateI con)); 4061 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4062 ins_encode %{ 4063 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4064 %} 4065 ins_pipe( fpu_reg_reg ); 4066 %} 4067 4068 // Replicate integer (4 byte) scalar zero to be vector 4069 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4070 predicate(n->as_Vector()->length() == 2); 4071 match(Set dst (ReplicateI zero)); 4072 format %{ "pxor $dst,$dst\t! replicate2I" %} 4073 ins_encode %{ 4074 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4075 %} 4076 ins_pipe( fpu_reg_reg ); 4077 %} 4078 4079 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4080 predicate(n->as_Vector()->length() == 4); 4081 match(Set dst (ReplicateI zero)); 4082 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4083 ins_encode %{ 4084 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4085 %} 4086 ins_pipe( fpu_reg_reg ); 4087 %} 4088 4089 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4090 predicate(n->as_Vector()->length() == 8); 4091 match(Set dst (ReplicateI zero)); 4092 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4093 ins_encode %{ 4094 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4095 int vector_len = 1; 4096 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4097 %} 4098 ins_pipe( fpu_reg_reg ); 4099 %} 4100 4101 // Replicate long (8 byte) scalar to be vector 4102 #ifdef _LP64 4103 instruct Repl2L(vecX dst, rRegL src) %{ 4104 predicate(n->as_Vector()->length() == 2); 4105 match(Set dst (ReplicateL src)); 4106 format %{ "movdq $dst,$src\n\t" 4107 "punpcklqdq $dst,$dst\t! replicate2L" %} 4108 ins_encode %{ 4109 __ movdq($dst$$XMMRegister, $src$$Register); 4110 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 #else // _LP64 4115 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4116 predicate(n->as_Vector()->length() == 2); 4117 match(Set dst (ReplicateL src)); 4118 effect(TEMP dst, USE src, TEMP tmp); 4119 format %{ "movdl $dst,$src.lo\n\t" 4120 "movdl $tmp,$src.hi\n\t" 4121 "punpckldq $dst,$tmp\n\t" 4122 "punpcklqdq $dst,$dst\t! replicate2L"%} 4123 ins_encode %{ 4124 __ movdl($dst$$XMMRegister, $src$$Register); 4125 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4126 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4127 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4128 %} 4129 ins_pipe( pipe_slow ); 4130 %} 4131 #endif // _LP64 4132 4133 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4134 instruct Repl2L_imm(vecX dst, immL con) %{ 4135 predicate(n->as_Vector()->length() == 2); 4136 match(Set dst (ReplicateL con)); 4137 format %{ "movq $dst,[$constantaddress]\n\t" 4138 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4139 ins_encode %{ 4140 __ movq($dst$$XMMRegister, $constantaddress($con)); 4141 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4142 %} 4143 ins_pipe( pipe_slow ); 4144 %} 4145 4146 // Replicate long (8 byte) scalar zero to be vector 4147 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4148 predicate(n->as_Vector()->length() == 2); 4149 match(Set dst (ReplicateL zero)); 4150 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4151 ins_encode %{ 4152 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4153 %} 4154 ins_pipe( fpu_reg_reg ); 4155 %} 4156 4157 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4158 predicate(n->as_Vector()->length() == 4); 4159 match(Set dst (ReplicateL zero)); 4160 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4161 ins_encode %{ 4162 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4163 int vector_len = 1; 4164 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4165 %} 4166 ins_pipe( fpu_reg_reg ); 4167 %} 4168 4169 // Replicate float (4 byte) scalar to be vector 4170 instruct Repl2F(vecD dst, vlRegF src) %{ 4171 predicate(n->as_Vector()->length() == 2); 4172 match(Set dst (ReplicateF src)); 4173 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4174 ins_encode %{ 4175 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4176 %} 4177 ins_pipe( fpu_reg_reg ); 4178 %} 4179 4180 instruct Repl4F(vecX dst, vlRegF src) %{ 4181 predicate(n->as_Vector()->length() == 4); 4182 match(Set dst (ReplicateF src)); 4183 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4184 ins_encode %{ 4185 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4186 %} 4187 ins_pipe( pipe_slow ); 4188 %} 4189 4190 // Replicate double (8 bytes) scalar to be vector 4191 instruct Repl2D(vecX dst, vlRegD src) %{ 4192 predicate(n->as_Vector()->length() == 2); 4193 match(Set dst (ReplicateD src)); 4194 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4195 ins_encode %{ 4196 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4197 %} 4198 ins_pipe( pipe_slow ); 4199 %} 4200 4201 // ====================EVEX REPLICATE============================================= 4202 4203 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4204 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4205 match(Set dst (ReplicateB (LoadB mem))); 4206 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4207 ins_encode %{ 4208 int vector_len = 0; 4209 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4215 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4216 match(Set dst (ReplicateB (LoadB mem))); 4217 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4218 ins_encode %{ 4219 int vector_len = 0; 4220 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4221 %} 4222 ins_pipe( pipe_slow ); 4223 %} 4224 4225 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4226 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4227 match(Set dst (ReplicateB src)); 4228 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4229 ins_encode %{ 4230 int vector_len = 0; 4231 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4237 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4238 match(Set dst (ReplicateB (LoadB mem))); 4239 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4240 ins_encode %{ 4241 int vector_len = 0; 4242 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4243 %} 4244 ins_pipe( pipe_slow ); 4245 %} 4246 4247 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4248 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4249 match(Set dst (ReplicateB src)); 4250 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4251 ins_encode %{ 4252 int vector_len = 1; 4253 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4259 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4260 match(Set dst (ReplicateB (LoadB mem))); 4261 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4262 ins_encode %{ 4263 int vector_len = 1; 4264 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4265 %} 4266 ins_pipe( pipe_slow ); 4267 %} 4268 4269 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4270 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4271 match(Set dst (ReplicateB src)); 4272 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4273 ins_encode %{ 4274 int vector_len = 2; 4275 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4276 %} 4277 ins_pipe( pipe_slow ); 4278 %} 4279 4280 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4281 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4282 match(Set dst (ReplicateB (LoadB mem))); 4283 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4284 ins_encode %{ 4285 int vector_len = 2; 4286 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4287 %} 4288 ins_pipe( pipe_slow ); 4289 %} 4290 4291 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4292 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4293 match(Set dst (ReplicateB con)); 4294 format %{ "movq $dst,[$constantaddress]\n\t" 4295 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4296 ins_encode %{ 4297 int vector_len = 0; 4298 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4299 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4300 %} 4301 ins_pipe( pipe_slow ); 4302 %} 4303 4304 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4305 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4306 match(Set dst (ReplicateB con)); 4307 format %{ "movq $dst,[$constantaddress]\n\t" 4308 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4309 ins_encode %{ 4310 int vector_len = 1; 4311 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4312 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4318 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4319 match(Set dst (ReplicateB con)); 4320 format %{ "movq $dst,[$constantaddress]\n\t" 4321 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4322 ins_encode %{ 4323 int vector_len = 2; 4324 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4325 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4331 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4332 match(Set dst (ReplicateB zero)); 4333 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4334 ins_encode %{ 4335 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4336 int vector_len = 2; 4337 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4338 %} 4339 ins_pipe( fpu_reg_reg ); 4340 %} 4341 4342 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4343 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4344 match(Set dst (ReplicateS src)); 4345 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4346 ins_encode %{ 4347 int vector_len = 0; 4348 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4349 %} 4350 ins_pipe( pipe_slow ); 4351 %} 4352 4353 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4354 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4355 match(Set dst (ReplicateS (LoadS mem))); 4356 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4357 ins_encode %{ 4358 int vector_len = 0; 4359 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4360 %} 4361 ins_pipe( pipe_slow ); 4362 %} 4363 4364 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4365 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4366 match(Set dst (ReplicateS src)); 4367 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4368 ins_encode %{ 4369 int vector_len = 0; 4370 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4376 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4377 match(Set dst (ReplicateS (LoadS mem))); 4378 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4379 ins_encode %{ 4380 int vector_len = 0; 4381 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4382 %} 4383 ins_pipe( pipe_slow ); 4384 %} 4385 4386 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4387 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4388 match(Set dst (ReplicateS src)); 4389 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4390 ins_encode %{ 4391 int vector_len = 1; 4392 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4393 %} 4394 ins_pipe( pipe_slow ); 4395 %} 4396 4397 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4398 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4399 match(Set dst (ReplicateS (LoadS mem))); 4400 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4401 ins_encode %{ 4402 int vector_len = 1; 4403 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4404 %} 4405 ins_pipe( pipe_slow ); 4406 %} 4407 4408 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4409 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4410 match(Set dst (ReplicateS src)); 4411 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4412 ins_encode %{ 4413 int vector_len = 2; 4414 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4415 %} 4416 ins_pipe( pipe_slow ); 4417 %} 4418 4419 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4420 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4421 match(Set dst (ReplicateS (LoadS mem))); 4422 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4423 ins_encode %{ 4424 int vector_len = 2; 4425 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4426 %} 4427 ins_pipe( pipe_slow ); 4428 %} 4429 4430 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4431 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4432 match(Set dst (ReplicateS con)); 4433 format %{ "movq $dst,[$constantaddress]\n\t" 4434 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4435 ins_encode %{ 4436 int vector_len = 0; 4437 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4438 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4439 %} 4440 ins_pipe( pipe_slow ); 4441 %} 4442 4443 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4444 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4445 match(Set dst (ReplicateS con)); 4446 format %{ "movq $dst,[$constantaddress]\n\t" 4447 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4448 ins_encode %{ 4449 int vector_len = 1; 4450 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4451 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4452 %} 4453 ins_pipe( pipe_slow ); 4454 %} 4455 4456 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4457 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4458 match(Set dst (ReplicateS con)); 4459 format %{ "movq $dst,[$constantaddress]\n\t" 4460 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4461 ins_encode %{ 4462 int vector_len = 2; 4463 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4464 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4470 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4471 match(Set dst (ReplicateS zero)); 4472 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4473 ins_encode %{ 4474 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4475 int vector_len = 2; 4476 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4477 %} 4478 ins_pipe( fpu_reg_reg ); 4479 %} 4480 4481 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4482 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4483 match(Set dst (ReplicateI src)); 4484 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4485 ins_encode %{ 4486 int vector_len = 0; 4487 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4493 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4494 match(Set dst (ReplicateI (LoadI mem))); 4495 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4496 ins_encode %{ 4497 int vector_len = 0; 4498 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4499 %} 4500 ins_pipe( pipe_slow ); 4501 %} 4502 4503 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4504 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4505 match(Set dst (ReplicateI src)); 4506 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4507 ins_encode %{ 4508 int vector_len = 1; 4509 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4510 %} 4511 ins_pipe( pipe_slow ); 4512 %} 4513 4514 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4515 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4516 match(Set dst (ReplicateI (LoadI mem))); 4517 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4518 ins_encode %{ 4519 int vector_len = 1; 4520 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4521 %} 4522 ins_pipe( pipe_slow ); 4523 %} 4524 4525 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4526 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4527 match(Set dst (ReplicateI src)); 4528 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4529 ins_encode %{ 4530 int vector_len = 2; 4531 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4532 %} 4533 ins_pipe( pipe_slow ); 4534 %} 4535 4536 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4537 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4538 match(Set dst (ReplicateI (LoadI mem))); 4539 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4540 ins_encode %{ 4541 int vector_len = 2; 4542 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4543 %} 4544 ins_pipe( pipe_slow ); 4545 %} 4546 4547 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4548 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4549 match(Set dst (ReplicateI con)); 4550 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4551 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4552 ins_encode %{ 4553 int vector_len = 0; 4554 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4555 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4556 %} 4557 ins_pipe( pipe_slow ); 4558 %} 4559 4560 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4561 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4562 match(Set dst (ReplicateI con)); 4563 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4564 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4565 ins_encode %{ 4566 int vector_len = 1; 4567 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4568 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4569 %} 4570 ins_pipe( pipe_slow ); 4571 %} 4572 4573 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4574 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4575 match(Set dst (ReplicateI con)); 4576 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4577 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4578 ins_encode %{ 4579 int vector_len = 2; 4580 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4581 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4587 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4588 match(Set dst (ReplicateI zero)); 4589 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4590 ins_encode %{ 4591 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4592 int vector_len = 2; 4593 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4594 %} 4595 ins_pipe( fpu_reg_reg ); 4596 %} 4597 4598 // Replicate long (8 byte) scalar to be vector 4599 #ifdef _LP64 4600 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4601 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4602 match(Set dst (ReplicateL src)); 4603 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4604 ins_encode %{ 4605 int vector_len = 1; 4606 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4607 %} 4608 ins_pipe( pipe_slow ); 4609 %} 4610 4611 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4612 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4613 match(Set dst (ReplicateL src)); 4614 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4615 ins_encode %{ 4616 int vector_len = 2; 4617 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4618 %} 4619 ins_pipe( pipe_slow ); 4620 %} 4621 #else // _LP64 4622 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4623 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4624 match(Set dst (ReplicateL src)); 4625 effect(TEMP dst, USE src, TEMP tmp); 4626 format %{ "movdl $dst,$src.lo\n\t" 4627 "movdl $tmp,$src.hi\n\t" 4628 "punpckldq $dst,$tmp\n\t" 4629 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4630 ins_encode %{ 4631 int vector_len = 1; 4632 __ movdl($dst$$XMMRegister, $src$$Register); 4633 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4634 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4635 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4636 %} 4637 ins_pipe( pipe_slow ); 4638 %} 4639 4640 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4641 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4642 match(Set dst (ReplicateL src)); 4643 effect(TEMP dst, USE src, TEMP tmp); 4644 format %{ "movdl $dst,$src.lo\n\t" 4645 "movdl $tmp,$src.hi\n\t" 4646 "punpckldq $dst,$tmp\n\t" 4647 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4648 ins_encode %{ 4649 int vector_len = 2; 4650 __ movdl($dst$$XMMRegister, $src$$Register); 4651 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4652 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4653 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 #endif // _LP64 4658 4659 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4660 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4661 match(Set dst (ReplicateL con)); 4662 format %{ "movq $dst,[$constantaddress]\n\t" 4663 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4664 ins_encode %{ 4665 int vector_len = 1; 4666 __ movq($dst$$XMMRegister, $constantaddress($con)); 4667 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4668 %} 4669 ins_pipe( pipe_slow ); 4670 %} 4671 4672 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4673 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4674 match(Set dst (ReplicateL con)); 4675 format %{ "movq $dst,[$constantaddress]\n\t" 4676 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4677 ins_encode %{ 4678 int vector_len = 2; 4679 __ movq($dst$$XMMRegister, $constantaddress($con)); 4680 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4681 %} 4682 ins_pipe( pipe_slow ); 4683 %} 4684 4685 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4686 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4687 match(Set dst (ReplicateL (LoadL mem))); 4688 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4689 ins_encode %{ 4690 int vector_len = 0; 4691 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4692 %} 4693 ins_pipe( pipe_slow ); 4694 %} 4695 4696 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4697 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4698 match(Set dst (ReplicateL (LoadL mem))); 4699 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4700 ins_encode %{ 4701 int vector_len = 1; 4702 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4703 %} 4704 ins_pipe( pipe_slow ); 4705 %} 4706 4707 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4708 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4709 match(Set dst (ReplicateL (LoadL mem))); 4710 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4711 ins_encode %{ 4712 int vector_len = 2; 4713 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4714 %} 4715 ins_pipe( pipe_slow ); 4716 %} 4717 4718 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4719 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4720 match(Set dst (ReplicateL zero)); 4721 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4722 ins_encode %{ 4723 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4724 int vector_len = 2; 4725 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4726 %} 4727 ins_pipe( fpu_reg_reg ); 4728 %} 4729 4730 instruct Repl8F_evex(vecY dst, regF src) %{ 4731 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4732 match(Set dst (ReplicateF src)); 4733 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4734 ins_encode %{ 4735 int vector_len = 1; 4736 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4737 %} 4738 ins_pipe( pipe_slow ); 4739 %} 4740 4741 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4742 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4743 match(Set dst (ReplicateF (LoadF mem))); 4744 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4745 ins_encode %{ 4746 int vector_len = 1; 4747 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4748 %} 4749 ins_pipe( pipe_slow ); 4750 %} 4751 4752 instruct Repl16F_evex(vecZ dst, regF src) %{ 4753 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4754 match(Set dst (ReplicateF src)); 4755 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4756 ins_encode %{ 4757 int vector_len = 2; 4758 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4759 %} 4760 ins_pipe( pipe_slow ); 4761 %} 4762 4763 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4764 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4765 match(Set dst (ReplicateF (LoadF mem))); 4766 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4767 ins_encode %{ 4768 int vector_len = 2; 4769 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4770 %} 4771 ins_pipe( pipe_slow ); 4772 %} 4773 4774 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4775 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4776 match(Set dst (ReplicateF zero)); 4777 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4778 ins_encode %{ 4779 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4780 int vector_len = 2; 4781 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4782 %} 4783 ins_pipe( fpu_reg_reg ); 4784 %} 4785 4786 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4787 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4788 match(Set dst (ReplicateF zero)); 4789 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4790 ins_encode %{ 4791 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4792 int vector_len = 2; 4793 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4794 %} 4795 ins_pipe( fpu_reg_reg ); 4796 %} 4797 4798 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4799 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4800 match(Set dst (ReplicateF zero)); 4801 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4802 ins_encode %{ 4803 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4804 int vector_len = 2; 4805 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4806 %} 4807 ins_pipe( fpu_reg_reg ); 4808 %} 4809 4810 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4811 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4812 match(Set dst (ReplicateF zero)); 4813 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4814 ins_encode %{ 4815 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4816 int vector_len = 2; 4817 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4818 %} 4819 ins_pipe( fpu_reg_reg ); 4820 %} 4821 4822 instruct Repl4D_evex(vecY dst, regD src) %{ 4823 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4824 match(Set dst (ReplicateD src)); 4825 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4826 ins_encode %{ 4827 int vector_len = 1; 4828 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4829 %} 4830 ins_pipe( pipe_slow ); 4831 %} 4832 4833 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4834 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4835 match(Set dst (ReplicateD (LoadD mem))); 4836 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4837 ins_encode %{ 4838 int vector_len = 1; 4839 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4840 %} 4841 ins_pipe( pipe_slow ); 4842 %} 4843 4844 instruct Repl8D_evex(vecZ dst, regD src) %{ 4845 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4846 match(Set dst (ReplicateD src)); 4847 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4848 ins_encode %{ 4849 int vector_len = 2; 4850 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4851 %} 4852 ins_pipe( pipe_slow ); 4853 %} 4854 4855 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4856 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4857 match(Set dst (ReplicateD (LoadD mem))); 4858 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4859 ins_encode %{ 4860 int vector_len = 2; 4861 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4862 %} 4863 ins_pipe( pipe_slow ); 4864 %} 4865 4866 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4867 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4868 match(Set dst (ReplicateD zero)); 4869 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4870 ins_encode %{ 4871 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4872 int vector_len = 2; 4873 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4874 %} 4875 ins_pipe( fpu_reg_reg ); 4876 %} 4877 4878 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4879 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4880 match(Set dst (ReplicateD zero)); 4881 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4882 ins_encode %{ 4883 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4884 int vector_len = 2; 4885 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4886 %} 4887 ins_pipe( fpu_reg_reg ); 4888 %} 4889 4890 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4891 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4892 match(Set dst (ReplicateD zero)); 4893 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4894 ins_encode %{ 4895 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4896 int vector_len = 2; 4897 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4898 %} 4899 ins_pipe( fpu_reg_reg ); 4900 %} 4901 4902 // ====================REDUCTION ARITHMETIC======================================= 4903 4904 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4905 predicate(UseSSE > 2 && UseAVX == 0); 4906 match(Set dst (AddReductionVI src1 src2)); 4907 effect(TEMP tmp2, TEMP tmp); 4908 format %{ "movdqu $tmp2,$src2\n\t" 4909 "phaddd $tmp2,$tmp2\n\t" 4910 "movd $tmp,$src1\n\t" 4911 "paddd $tmp,$tmp2\n\t" 4912 "movd $dst,$tmp\t! add reduction2I" %} 4913 ins_encode %{ 4914 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4915 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4916 __ movdl($tmp$$XMMRegister, $src1$$Register); 4917 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4918 __ movdl($dst$$Register, $tmp$$XMMRegister); 4919 %} 4920 ins_pipe( pipe_slow ); 4921 %} 4922 4923 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4924 predicate(VM_Version::supports_avxonly()); 4925 match(Set dst (AddReductionVI src1 src2)); 4926 effect(TEMP tmp, TEMP tmp2); 4927 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4928 "movd $tmp2,$src1\n\t" 4929 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4930 "movd $dst,$tmp2\t! add reduction2I" %} 4931 ins_encode %{ 4932 int vector_len = 0; 4933 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4934 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4935 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4936 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4937 %} 4938 ins_pipe( pipe_slow ); 4939 %} 4940 4941 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4942 predicate(UseAVX > 2); 4943 match(Set dst (AddReductionVI src1 src2)); 4944 effect(TEMP tmp, TEMP tmp2); 4945 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4946 "vpaddd $tmp,$src2,$tmp2\n\t" 4947 "movd $tmp2,$src1\n\t" 4948 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4949 "movd $dst,$tmp2\t! add reduction2I" %} 4950 ins_encode %{ 4951 int vector_len = 0; 4952 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4953 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4954 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4955 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4956 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4957 %} 4958 ins_pipe( pipe_slow ); 4959 %} 4960 4961 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4962 predicate(UseSSE > 2 && UseAVX == 0); 4963 match(Set dst (AddReductionVI src1 src2)); 4964 effect(TEMP tmp, TEMP tmp2); 4965 format %{ "movdqu $tmp,$src2\n\t" 4966 "phaddd $tmp,$tmp\n\t" 4967 "phaddd $tmp,$tmp\n\t" 4968 "movd $tmp2,$src1\n\t" 4969 "paddd $tmp2,$tmp\n\t" 4970 "movd $dst,$tmp2\t! add reduction4I" %} 4971 ins_encode %{ 4972 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4973 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4974 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4975 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4976 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4977 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4978 %} 4979 ins_pipe( pipe_slow ); 4980 %} 4981 4982 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4983 predicate(VM_Version::supports_avxonly()); 4984 match(Set dst (AddReductionVI src1 src2)); 4985 effect(TEMP tmp, TEMP tmp2); 4986 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4987 "vphaddd $tmp,$tmp,$tmp\n\t" 4988 "movd $tmp2,$src1\n\t" 4989 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4990 "movd $dst,$tmp2\t! add reduction4I" %} 4991 ins_encode %{ 4992 int vector_len = 0; 4993 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4994 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4995 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4996 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4997 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4998 %} 4999 ins_pipe( pipe_slow ); 5000 %} 5001 5002 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5003 predicate(UseAVX > 2); 5004 match(Set dst (AddReductionVI src1 src2)); 5005 effect(TEMP tmp, TEMP tmp2); 5006 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5007 "vpaddd $tmp,$src2,$tmp2\n\t" 5008 "pshufd $tmp2,$tmp,0x1\n\t" 5009 "vpaddd $tmp,$tmp,$tmp2\n\t" 5010 "movd $tmp2,$src1\n\t" 5011 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5012 "movd $dst,$tmp2\t! add reduction4I" %} 5013 ins_encode %{ 5014 int vector_len = 0; 5015 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5016 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5017 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5018 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5019 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5020 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5021 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5022 %} 5023 ins_pipe( pipe_slow ); 5024 %} 5025 5026 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5027 predicate(VM_Version::supports_avxonly()); 5028 match(Set dst (AddReductionVI src1 src2)); 5029 effect(TEMP tmp, TEMP tmp2); 5030 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5031 "vphaddd $tmp,$tmp,$tmp2\n\t" 5032 "vextracti128_high $tmp2,$tmp\n\t" 5033 "vpaddd $tmp,$tmp,$tmp2\n\t" 5034 "movd $tmp2,$src1\n\t" 5035 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5036 "movd $dst,$tmp2\t! add reduction8I" %} 5037 ins_encode %{ 5038 int vector_len = 1; 5039 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5040 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5041 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5042 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5043 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5044 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5045 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5046 %} 5047 ins_pipe( pipe_slow ); 5048 %} 5049 5050 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5051 predicate(UseAVX > 2); 5052 match(Set dst (AddReductionVI src1 src2)); 5053 effect(TEMP tmp, TEMP tmp2); 5054 format %{ "vextracti128_high $tmp,$src2\n\t" 5055 "vpaddd $tmp,$tmp,$src2\n\t" 5056 "pshufd $tmp2,$tmp,0xE\n\t" 5057 "vpaddd $tmp,$tmp,$tmp2\n\t" 5058 "pshufd $tmp2,$tmp,0x1\n\t" 5059 "vpaddd $tmp,$tmp,$tmp2\n\t" 5060 "movd $tmp2,$src1\n\t" 5061 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5062 "movd $dst,$tmp2\t! add reduction8I" %} 5063 ins_encode %{ 5064 int vector_len = 0; 5065 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5066 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5067 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5068 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5069 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5070 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5071 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5072 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5073 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5074 %} 5075 ins_pipe( pipe_slow ); 5076 %} 5077 5078 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5079 predicate(UseAVX > 2); 5080 match(Set dst (AddReductionVI src1 src2)); 5081 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5082 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5083 "vpaddd $tmp3,$tmp3,$src2\n\t" 5084 "vextracti128_high $tmp,$tmp3\n\t" 5085 "vpaddd $tmp,$tmp,$tmp3\n\t" 5086 "pshufd $tmp2,$tmp,0xE\n\t" 5087 "vpaddd $tmp,$tmp,$tmp2\n\t" 5088 "pshufd $tmp2,$tmp,0x1\n\t" 5089 "vpaddd $tmp,$tmp,$tmp2\n\t" 5090 "movd $tmp2,$src1\n\t" 5091 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5092 "movd $dst,$tmp2\t! mul reduction16I" %} 5093 ins_encode %{ 5094 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5095 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5096 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5097 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5098 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5099 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5100 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5101 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5102 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5103 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5104 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5105 %} 5106 ins_pipe( pipe_slow ); 5107 %} 5108 5109 #ifdef _LP64 5110 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5111 predicate(UseAVX > 2); 5112 match(Set dst (AddReductionVL src1 src2)); 5113 effect(TEMP tmp, TEMP tmp2); 5114 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5115 "vpaddq $tmp,$src2,$tmp2\n\t" 5116 "movdq $tmp2,$src1\n\t" 5117 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5118 "movdq $dst,$tmp2\t! add reduction2L" %} 5119 ins_encode %{ 5120 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5121 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5122 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5123 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5124 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5125 %} 5126 ins_pipe( pipe_slow ); 5127 %} 5128 5129 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5130 predicate(UseAVX > 2); 5131 match(Set dst (AddReductionVL src1 src2)); 5132 effect(TEMP tmp, TEMP tmp2); 5133 format %{ "vextracti128_high $tmp,$src2\n\t" 5134 "vpaddq $tmp2,$tmp,$src2\n\t" 5135 "pshufd $tmp,$tmp2,0xE\n\t" 5136 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5137 "movdq $tmp,$src1\n\t" 5138 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5139 "movdq $dst,$tmp2\t! add reduction4L" %} 5140 ins_encode %{ 5141 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5142 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5143 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5144 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5145 __ movdq($tmp$$XMMRegister, $src1$$Register); 5146 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5147 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5148 %} 5149 ins_pipe( pipe_slow ); 5150 %} 5151 5152 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5153 predicate(UseAVX > 2); 5154 match(Set dst (AddReductionVL src1 src2)); 5155 effect(TEMP tmp, TEMP tmp2); 5156 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5157 "vpaddq $tmp2,$tmp2,$src2\n\t" 5158 "vextracti128_high $tmp,$tmp2\n\t" 5159 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5160 "pshufd $tmp,$tmp2,0xE\n\t" 5161 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5162 "movdq $tmp,$src1\n\t" 5163 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5164 "movdq $dst,$tmp2\t! add reduction8L" %} 5165 ins_encode %{ 5166 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5167 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5168 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5169 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5170 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5171 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5172 __ movdq($tmp$$XMMRegister, $src1$$Register); 5173 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5174 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5175 %} 5176 ins_pipe( pipe_slow ); 5177 %} 5178 #endif 5179 5180 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5181 predicate(UseSSE >= 1 && UseAVX == 0); 5182 match(Set dst (AddReductionVF dst src2)); 5183 effect(TEMP dst, TEMP tmp); 5184 format %{ "addss $dst,$src2\n\t" 5185 "pshufd $tmp,$src2,0x01\n\t" 5186 "addss $dst,$tmp\t! add reduction2F" %} 5187 ins_encode %{ 5188 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5189 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5190 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5191 %} 5192 ins_pipe( pipe_slow ); 5193 %} 5194 5195 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5196 predicate(UseAVX > 0); 5197 match(Set dst (AddReductionVF dst src2)); 5198 effect(TEMP dst, TEMP tmp); 5199 format %{ "vaddss $dst,$dst,$src2\n\t" 5200 "pshufd $tmp,$src2,0x01\n\t" 5201 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5202 ins_encode %{ 5203 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5204 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5205 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5206 %} 5207 ins_pipe( pipe_slow ); 5208 %} 5209 5210 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5211 predicate(UseSSE >= 1 && UseAVX == 0); 5212 match(Set dst (AddReductionVF dst src2)); 5213 effect(TEMP dst, TEMP tmp); 5214 format %{ "addss $dst,$src2\n\t" 5215 "pshufd $tmp,$src2,0x01\n\t" 5216 "addss $dst,$tmp\n\t" 5217 "pshufd $tmp,$src2,0x02\n\t" 5218 "addss $dst,$tmp\n\t" 5219 "pshufd $tmp,$src2,0x03\n\t" 5220 "addss $dst,$tmp\t! add reduction4F" %} 5221 ins_encode %{ 5222 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5223 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5224 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5225 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5226 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5227 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5228 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5229 %} 5230 ins_pipe( pipe_slow ); 5231 %} 5232 5233 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5234 predicate(UseAVX > 0); 5235 match(Set dst (AddReductionVF dst src2)); 5236 effect(TEMP tmp, TEMP dst); 5237 format %{ "vaddss $dst,dst,$src2\n\t" 5238 "pshufd $tmp,$src2,0x01\n\t" 5239 "vaddss $dst,$dst,$tmp\n\t" 5240 "pshufd $tmp,$src2,0x02\n\t" 5241 "vaddss $dst,$dst,$tmp\n\t" 5242 "pshufd $tmp,$src2,0x03\n\t" 5243 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5244 ins_encode %{ 5245 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5246 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5247 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5248 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5249 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5250 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5251 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5252 %} 5253 ins_pipe( pipe_slow ); 5254 %} 5255 5256 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5257 predicate(UseAVX > 0); 5258 match(Set dst (AddReductionVF dst src2)); 5259 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5260 format %{ "vaddss $dst,$dst,$src2\n\t" 5261 "pshufd $tmp,$src2,0x01\n\t" 5262 "vaddss $dst,$dst,$tmp\n\t" 5263 "pshufd $tmp,$src2,0x02\n\t" 5264 "vaddss $dst,$dst,$tmp\n\t" 5265 "pshufd $tmp,$src2,0x03\n\t" 5266 "vaddss $dst,$dst,$tmp\n\t" 5267 "vextractf128_high $tmp2,$src2\n\t" 5268 "vaddss $dst,$dst,$tmp2\n\t" 5269 "pshufd $tmp,$tmp2,0x01\n\t" 5270 "vaddss $dst,$dst,$tmp\n\t" 5271 "pshufd $tmp,$tmp2,0x02\n\t" 5272 "vaddss $dst,$dst,$tmp\n\t" 5273 "pshufd $tmp,$tmp2,0x03\n\t" 5274 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5275 ins_encode %{ 5276 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5277 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5278 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5279 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5280 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5281 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5282 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5283 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5284 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5285 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5286 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5287 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5288 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5289 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5290 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5291 %} 5292 ins_pipe( pipe_slow ); 5293 %} 5294 5295 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5296 predicate(UseAVX > 2); 5297 match(Set dst (AddReductionVF dst src2)); 5298 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5299 format %{ "vaddss $dst,$dst,$src2\n\t" 5300 "pshufd $tmp,$src2,0x01\n\t" 5301 "vaddss $dst,$dst,$tmp\n\t" 5302 "pshufd $tmp,$src2,0x02\n\t" 5303 "vaddss $dst,$dst,$tmp\n\t" 5304 "pshufd $tmp,$src2,0x03\n\t" 5305 "vaddss $dst,$dst,$tmp\n\t" 5306 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5307 "vaddss $dst,$dst,$tmp2\n\t" 5308 "pshufd $tmp,$tmp2,0x01\n\t" 5309 "vaddss $dst,$dst,$tmp\n\t" 5310 "pshufd $tmp,$tmp2,0x02\n\t" 5311 "vaddss $dst,$dst,$tmp\n\t" 5312 "pshufd $tmp,$tmp2,0x03\n\t" 5313 "vaddss $dst,$dst,$tmp\n\t" 5314 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5315 "vaddss $dst,$dst,$tmp2\n\t" 5316 "pshufd $tmp,$tmp2,0x01\n\t" 5317 "vaddss $dst,$dst,$tmp\n\t" 5318 "pshufd $tmp,$tmp2,0x02\n\t" 5319 "vaddss $dst,$dst,$tmp\n\t" 5320 "pshufd $tmp,$tmp2,0x03\n\t" 5321 "vaddss $dst,$dst,$tmp\n\t" 5322 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5323 "vaddss $dst,$dst,$tmp2\n\t" 5324 "pshufd $tmp,$tmp2,0x01\n\t" 5325 "vaddss $dst,$dst,$tmp\n\t" 5326 "pshufd $tmp,$tmp2,0x02\n\t" 5327 "vaddss $dst,$dst,$tmp\n\t" 5328 "pshufd $tmp,$tmp2,0x03\n\t" 5329 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5330 ins_encode %{ 5331 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5332 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5333 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5334 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5335 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5336 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5337 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5338 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5339 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5340 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5341 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5342 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5343 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5344 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5345 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5346 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5347 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5348 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5349 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5350 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5351 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5352 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5353 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5354 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5355 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5356 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5357 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5358 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5359 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5360 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5361 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5362 %} 5363 ins_pipe( pipe_slow ); 5364 %} 5365 5366 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5367 predicate(UseSSE >= 1 && UseAVX == 0); 5368 match(Set dst (AddReductionVD dst src2)); 5369 effect(TEMP tmp, TEMP dst); 5370 format %{ "addsd $dst,$src2\n\t" 5371 "pshufd $tmp,$src2,0xE\n\t" 5372 "addsd $dst,$tmp\t! add reduction2D" %} 5373 ins_encode %{ 5374 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5375 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5376 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 %} 5380 5381 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5382 predicate(UseAVX > 0); 5383 match(Set dst (AddReductionVD dst src2)); 5384 effect(TEMP tmp, TEMP dst); 5385 format %{ "vaddsd $dst,$dst,$src2\n\t" 5386 "pshufd $tmp,$src2,0xE\n\t" 5387 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5388 ins_encode %{ 5389 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5390 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5391 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5392 %} 5393 ins_pipe( pipe_slow ); 5394 %} 5395 5396 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5397 predicate(UseAVX > 0); 5398 match(Set dst (AddReductionVD dst src2)); 5399 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5400 format %{ "vaddsd $dst,$dst,$src2\n\t" 5401 "pshufd $tmp,$src2,0xE\n\t" 5402 "vaddsd $dst,$dst,$tmp\n\t" 5403 "vextractf128 $tmp2,$src2,0x1\n\t" 5404 "vaddsd $dst,$dst,$tmp2\n\t" 5405 "pshufd $tmp,$tmp2,0xE\n\t" 5406 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5407 ins_encode %{ 5408 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5409 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5410 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5411 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5412 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5413 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5414 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5415 %} 5416 ins_pipe( pipe_slow ); 5417 %} 5418 5419 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5420 predicate(UseAVX > 2); 5421 match(Set dst (AddReductionVD dst src2)); 5422 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5423 format %{ "vaddsd $dst,$dst,$src2\n\t" 5424 "pshufd $tmp,$src2,0xE\n\t" 5425 "vaddsd $dst,$dst,$tmp\n\t" 5426 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5427 "vaddsd $dst,$dst,$tmp2\n\t" 5428 "pshufd $tmp,$tmp2,0xE\n\t" 5429 "vaddsd $dst,$dst,$tmp\n\t" 5430 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5431 "vaddsd $dst,$dst,$tmp2\n\t" 5432 "pshufd $tmp,$tmp2,0xE\n\t" 5433 "vaddsd $dst,$dst,$tmp\n\t" 5434 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5435 "vaddsd $dst,$dst,$tmp2\n\t" 5436 "pshufd $tmp,$tmp2,0xE\n\t" 5437 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5438 ins_encode %{ 5439 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5440 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5441 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5442 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5443 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5444 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5445 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5446 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5447 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5448 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5449 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5450 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5451 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5452 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5453 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5459 predicate(UseSSE > 3 && UseAVX == 0); 5460 match(Set dst (MulReductionVI src1 src2)); 5461 effect(TEMP tmp, TEMP tmp2); 5462 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5463 "pmulld $tmp2,$src2\n\t" 5464 "movd $tmp,$src1\n\t" 5465 "pmulld $tmp2,$tmp\n\t" 5466 "movd $dst,$tmp2\t! mul reduction2I" %} 5467 ins_encode %{ 5468 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5469 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5470 __ movdl($tmp$$XMMRegister, $src1$$Register); 5471 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5472 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5473 %} 5474 ins_pipe( pipe_slow ); 5475 %} 5476 5477 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5478 predicate(UseAVX > 0); 5479 match(Set dst (MulReductionVI src1 src2)); 5480 effect(TEMP tmp, TEMP tmp2); 5481 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5482 "vpmulld $tmp,$src2,$tmp2\n\t" 5483 "movd $tmp2,$src1\n\t" 5484 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5485 "movd $dst,$tmp2\t! mul reduction2I" %} 5486 ins_encode %{ 5487 int vector_len = 0; 5488 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5489 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5490 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5491 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5492 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5498 predicate(UseSSE > 3 && UseAVX == 0); 5499 match(Set dst (MulReductionVI src1 src2)); 5500 effect(TEMP tmp, TEMP tmp2); 5501 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5502 "pmulld $tmp2,$src2\n\t" 5503 "pshufd $tmp,$tmp2,0x1\n\t" 5504 "pmulld $tmp2,$tmp\n\t" 5505 "movd $tmp,$src1\n\t" 5506 "pmulld $tmp2,$tmp\n\t" 5507 "movd $dst,$tmp2\t! mul reduction4I" %} 5508 ins_encode %{ 5509 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5510 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5511 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5512 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5513 __ movdl($tmp$$XMMRegister, $src1$$Register); 5514 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5515 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5516 %} 5517 ins_pipe( pipe_slow ); 5518 %} 5519 5520 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5521 predicate(UseAVX > 0); 5522 match(Set dst (MulReductionVI src1 src2)); 5523 effect(TEMP tmp, TEMP tmp2); 5524 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5525 "vpmulld $tmp,$src2,$tmp2\n\t" 5526 "pshufd $tmp2,$tmp,0x1\n\t" 5527 "vpmulld $tmp,$tmp,$tmp2\n\t" 5528 "movd $tmp2,$src1\n\t" 5529 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5530 "movd $dst,$tmp2\t! mul reduction4I" %} 5531 ins_encode %{ 5532 int vector_len = 0; 5533 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5534 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5535 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5536 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5537 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5538 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5539 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5540 %} 5541 ins_pipe( pipe_slow ); 5542 %} 5543 5544 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5545 predicate(UseAVX > 1); 5546 match(Set dst (MulReductionVI src1 src2)); 5547 effect(TEMP tmp, TEMP tmp2); 5548 format %{ "vextracti128_high $tmp,$src2\n\t" 5549 "vpmulld $tmp,$tmp,$src2\n\t" 5550 "pshufd $tmp2,$tmp,0xE\n\t" 5551 "vpmulld $tmp,$tmp,$tmp2\n\t" 5552 "pshufd $tmp2,$tmp,0x1\n\t" 5553 "vpmulld $tmp,$tmp,$tmp2\n\t" 5554 "movd $tmp2,$src1\n\t" 5555 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5556 "movd $dst,$tmp2\t! mul reduction8I" %} 5557 ins_encode %{ 5558 int vector_len = 0; 5559 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5560 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5561 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5562 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5563 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5564 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5565 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5566 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5567 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5568 %} 5569 ins_pipe( pipe_slow ); 5570 %} 5571 5572 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5573 predicate(UseAVX > 2); 5574 match(Set dst (MulReductionVI src1 src2)); 5575 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5576 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5577 "vpmulld $tmp3,$tmp3,$src2\n\t" 5578 "vextracti128_high $tmp,$tmp3\n\t" 5579 "vpmulld $tmp,$tmp,$src2\n\t" 5580 "pshufd $tmp2,$tmp,0xE\n\t" 5581 "vpmulld $tmp,$tmp,$tmp2\n\t" 5582 "pshufd $tmp2,$tmp,0x1\n\t" 5583 "vpmulld $tmp,$tmp,$tmp2\n\t" 5584 "movd $tmp2,$src1\n\t" 5585 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5586 "movd $dst,$tmp2\t! mul reduction16I" %} 5587 ins_encode %{ 5588 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5589 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5590 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5591 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5592 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5593 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5594 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5595 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5596 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5597 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5598 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5599 %} 5600 ins_pipe( pipe_slow ); 5601 %} 5602 5603 #ifdef _LP64 5604 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5605 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5606 match(Set dst (MulReductionVL src1 src2)); 5607 effect(TEMP tmp, TEMP tmp2); 5608 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5609 "vpmullq $tmp,$src2,$tmp2\n\t" 5610 "movdq $tmp2,$src1\n\t" 5611 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5612 "movdq $dst,$tmp2\t! mul reduction2L" %} 5613 ins_encode %{ 5614 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5615 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5616 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5617 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5618 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5619 %} 5620 ins_pipe( pipe_slow ); 5621 %} 5622 5623 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5624 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5625 match(Set dst (MulReductionVL src1 src2)); 5626 effect(TEMP tmp, TEMP tmp2); 5627 format %{ "vextracti128_high $tmp,$src2\n\t" 5628 "vpmullq $tmp2,$tmp,$src2\n\t" 5629 "pshufd $tmp,$tmp2,0xE\n\t" 5630 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5631 "movdq $tmp,$src1\n\t" 5632 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5633 "movdq $dst,$tmp2\t! mul reduction4L" %} 5634 ins_encode %{ 5635 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5636 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5637 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5638 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5639 __ movdq($tmp$$XMMRegister, $src1$$Register); 5640 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5641 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5642 %} 5643 ins_pipe( pipe_slow ); 5644 %} 5645 5646 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5647 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5648 match(Set dst (MulReductionVL src1 src2)); 5649 effect(TEMP tmp, TEMP tmp2); 5650 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5651 "vpmullq $tmp2,$tmp2,$src2\n\t" 5652 "vextracti128_high $tmp,$tmp2\n\t" 5653 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5654 "pshufd $tmp,$tmp2,0xE\n\t" 5655 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5656 "movdq $tmp,$src1\n\t" 5657 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5658 "movdq $dst,$tmp2\t! mul reduction8L" %} 5659 ins_encode %{ 5660 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5661 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5662 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5663 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5664 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5665 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5666 __ movdq($tmp$$XMMRegister, $src1$$Register); 5667 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5668 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 #endif 5673 5674 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5675 predicate(UseSSE >= 1 && UseAVX == 0); 5676 match(Set dst (MulReductionVF dst src2)); 5677 effect(TEMP dst, TEMP tmp); 5678 format %{ "mulss $dst,$src2\n\t" 5679 "pshufd $tmp,$src2,0x01\n\t" 5680 "mulss $dst,$tmp\t! mul reduction2F" %} 5681 ins_encode %{ 5682 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5683 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5684 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5685 %} 5686 ins_pipe( pipe_slow ); 5687 %} 5688 5689 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5690 predicate(UseAVX > 0); 5691 match(Set dst (MulReductionVF dst src2)); 5692 effect(TEMP tmp, TEMP dst); 5693 format %{ "vmulss $dst,$dst,$src2\n\t" 5694 "pshufd $tmp,$src2,0x01\n\t" 5695 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5696 ins_encode %{ 5697 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5698 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5699 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5705 predicate(UseSSE >= 1 && UseAVX == 0); 5706 match(Set dst (MulReductionVF dst src2)); 5707 effect(TEMP dst, TEMP tmp); 5708 format %{ "mulss $dst,$src2\n\t" 5709 "pshufd $tmp,$src2,0x01\n\t" 5710 "mulss $dst,$tmp\n\t" 5711 "pshufd $tmp,$src2,0x02\n\t" 5712 "mulss $dst,$tmp\n\t" 5713 "pshufd $tmp,$src2,0x03\n\t" 5714 "mulss $dst,$tmp\t! mul reduction4F" %} 5715 ins_encode %{ 5716 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5717 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5718 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5719 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5720 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5721 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5722 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5728 predicate(UseAVX > 0); 5729 match(Set dst (MulReductionVF dst src2)); 5730 effect(TEMP tmp, TEMP dst); 5731 format %{ "vmulss $dst,$dst,$src2\n\t" 5732 "pshufd $tmp,$src2,0x01\n\t" 5733 "vmulss $dst,$dst,$tmp\n\t" 5734 "pshufd $tmp,$src2,0x02\n\t" 5735 "vmulss $dst,$dst,$tmp\n\t" 5736 "pshufd $tmp,$src2,0x03\n\t" 5737 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5738 ins_encode %{ 5739 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5740 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5741 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5742 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5743 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5744 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5745 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5751 predicate(UseAVX > 0); 5752 match(Set dst (MulReductionVF dst src2)); 5753 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5754 format %{ "vmulss $dst,$dst,$src2\n\t" 5755 "pshufd $tmp,$src2,0x01\n\t" 5756 "vmulss $dst,$dst,$tmp\n\t" 5757 "pshufd $tmp,$src2,0x02\n\t" 5758 "vmulss $dst,$dst,$tmp\n\t" 5759 "pshufd $tmp,$src2,0x03\n\t" 5760 "vmulss $dst,$dst,$tmp\n\t" 5761 "vextractf128_high $tmp2,$src2\n\t" 5762 "vmulss $dst,$dst,$tmp2\n\t" 5763 "pshufd $tmp,$tmp2,0x01\n\t" 5764 "vmulss $dst,$dst,$tmp\n\t" 5765 "pshufd $tmp,$tmp2,0x02\n\t" 5766 "vmulss $dst,$dst,$tmp\n\t" 5767 "pshufd $tmp,$tmp2,0x03\n\t" 5768 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5769 ins_encode %{ 5770 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5771 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5772 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5773 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5774 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5775 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5776 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5777 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5778 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5779 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5780 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5781 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5782 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5783 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5784 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5790 predicate(UseAVX > 2); 5791 match(Set dst (MulReductionVF dst src2)); 5792 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5793 format %{ "vmulss $dst,$dst,$src2\n\t" 5794 "pshufd $tmp,$src2,0x01\n\t" 5795 "vmulss $dst,$dst,$tmp\n\t" 5796 "pshufd $tmp,$src2,0x02\n\t" 5797 "vmulss $dst,$dst,$tmp\n\t" 5798 "pshufd $tmp,$src2,0x03\n\t" 5799 "vmulss $dst,$dst,$tmp\n\t" 5800 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5801 "vmulss $dst,$dst,$tmp2\n\t" 5802 "pshufd $tmp,$tmp2,0x01\n\t" 5803 "vmulss $dst,$dst,$tmp\n\t" 5804 "pshufd $tmp,$tmp2,0x02\n\t" 5805 "vmulss $dst,$dst,$tmp\n\t" 5806 "pshufd $tmp,$tmp2,0x03\n\t" 5807 "vmulss $dst,$dst,$tmp\n\t" 5808 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5809 "vmulss $dst,$dst,$tmp2\n\t" 5810 "pshufd $tmp,$tmp2,0x01\n\t" 5811 "vmulss $dst,$dst,$tmp\n\t" 5812 "pshufd $tmp,$tmp2,0x02\n\t" 5813 "vmulss $dst,$dst,$tmp\n\t" 5814 "pshufd $tmp,$tmp2,0x03\n\t" 5815 "vmulss $dst,$dst,$tmp\n\t" 5816 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5817 "vmulss $dst,$dst,$tmp2\n\t" 5818 "pshufd $tmp,$tmp2,0x01\n\t" 5819 "vmulss $dst,$dst,$tmp\n\t" 5820 "pshufd $tmp,$tmp2,0x02\n\t" 5821 "vmulss $dst,$dst,$tmp\n\t" 5822 "pshufd $tmp,$tmp2,0x03\n\t" 5823 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5824 ins_encode %{ 5825 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5826 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5827 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5828 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5829 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5830 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5831 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5832 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5833 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5834 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5835 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5836 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5837 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5838 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5839 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5840 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5841 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5842 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5843 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5844 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5845 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5846 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5847 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5848 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5849 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5850 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5851 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5852 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5853 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5854 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5855 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5861 predicate(UseSSE >= 1 && UseAVX == 0); 5862 match(Set dst (MulReductionVD dst src2)); 5863 effect(TEMP dst, TEMP tmp); 5864 format %{ "mulsd $dst,$src2\n\t" 5865 "pshufd $tmp,$src2,0xE\n\t" 5866 "mulsd $dst,$tmp\t! mul reduction2D" %} 5867 ins_encode %{ 5868 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5869 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5870 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5871 %} 5872 ins_pipe( pipe_slow ); 5873 %} 5874 5875 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5876 predicate(UseAVX > 0); 5877 match(Set dst (MulReductionVD dst src2)); 5878 effect(TEMP tmp, TEMP dst); 5879 format %{ "vmulsd $dst,$dst,$src2\n\t" 5880 "pshufd $tmp,$src2,0xE\n\t" 5881 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5882 ins_encode %{ 5883 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5884 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5885 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5891 predicate(UseAVX > 0); 5892 match(Set dst (MulReductionVD dst src2)); 5893 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5894 format %{ "vmulsd $dst,$dst,$src2\n\t" 5895 "pshufd $tmp,$src2,0xE\n\t" 5896 "vmulsd $dst,$dst,$tmp\n\t" 5897 "vextractf128_high $tmp2,$src2\n\t" 5898 "vmulsd $dst,$dst,$tmp2\n\t" 5899 "pshufd $tmp,$tmp2,0xE\n\t" 5900 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5901 ins_encode %{ 5902 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5903 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5904 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5905 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5906 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5907 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5908 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5914 predicate(UseAVX > 2); 5915 match(Set dst (MulReductionVD dst src2)); 5916 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5917 format %{ "vmulsd $dst,$dst,$src2\n\t" 5918 "pshufd $tmp,$src2,0xE\n\t" 5919 "vmulsd $dst,$dst,$tmp\n\t" 5920 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5921 "vmulsd $dst,$dst,$tmp2\n\t" 5922 "pshufd $tmp,$src2,0xE\n\t" 5923 "vmulsd $dst,$dst,$tmp\n\t" 5924 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5925 "vmulsd $dst,$dst,$tmp2\n\t" 5926 "pshufd $tmp,$tmp2,0xE\n\t" 5927 "vmulsd $dst,$dst,$tmp\n\t" 5928 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5929 "vmulsd $dst,$dst,$tmp2\n\t" 5930 "pshufd $tmp,$tmp2,0xE\n\t" 5931 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5932 ins_encode %{ 5933 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5934 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5935 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5936 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5937 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5938 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5939 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5940 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5941 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5942 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5943 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5944 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5945 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5946 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5947 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5948 %} 5949 ins_pipe( pipe_slow ); 5950 %} 5951 5952 // ====================VECTOR ARITHMETIC======================================= 5953 5954 // --------------------------------- ADD -------------------------------------- 5955 5956 // Bytes vector add 5957 instruct vadd4B(vecS dst, vecS src) %{ 5958 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5959 match(Set dst (AddVB dst src)); 5960 format %{ "paddb $dst,$src\t! add packed4B" %} 5961 ins_encode %{ 5962 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5969 match(Set dst (AddVB src1 src2)); 5970 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5971 ins_encode %{ 5972 int vector_len = 0; 5973 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 5979 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5980 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5981 match(Set dst (AddVB src (LoadVector mem))); 5982 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5983 ins_encode %{ 5984 int vector_len = 0; 5985 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5986 %} 5987 ins_pipe( pipe_slow ); 5988 %} 5989 5990 instruct vadd8B(vecD dst, vecD src) %{ 5991 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5992 match(Set dst (AddVB dst src)); 5993 format %{ "paddb $dst,$src\t! add packed8B" %} 5994 ins_encode %{ 5995 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6001 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6002 match(Set dst (AddVB src1 src2)); 6003 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6004 ins_encode %{ 6005 int vector_len = 0; 6006 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6007 %} 6008 ins_pipe( pipe_slow ); 6009 %} 6010 6011 6012 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6013 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6014 match(Set dst (AddVB src (LoadVector mem))); 6015 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6016 ins_encode %{ 6017 int vector_len = 0; 6018 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6019 %} 6020 ins_pipe( pipe_slow ); 6021 %} 6022 6023 instruct vadd16B(vecX dst, vecX src) %{ 6024 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6025 match(Set dst (AddVB dst src)); 6026 format %{ "paddb $dst,$src\t! add packed16B" %} 6027 ins_encode %{ 6028 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6034 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6035 match(Set dst (AddVB src1 src2)); 6036 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6037 ins_encode %{ 6038 int vector_len = 0; 6039 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6040 %} 6041 ins_pipe( pipe_slow ); 6042 %} 6043 6044 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6045 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6046 match(Set dst (AddVB src (LoadVector mem))); 6047 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6048 ins_encode %{ 6049 int vector_len = 0; 6050 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6051 %} 6052 ins_pipe( pipe_slow ); 6053 %} 6054 6055 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6056 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6057 match(Set dst (AddVB src1 src2)); 6058 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6059 ins_encode %{ 6060 int vector_len = 1; 6061 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6062 %} 6063 ins_pipe( pipe_slow ); 6064 %} 6065 6066 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6067 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6068 match(Set dst (AddVB src (LoadVector mem))); 6069 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6070 ins_encode %{ 6071 int vector_len = 1; 6072 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6073 %} 6074 ins_pipe( pipe_slow ); 6075 %} 6076 6077 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6078 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6079 match(Set dst (AddVB src1 src2)); 6080 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6081 ins_encode %{ 6082 int vector_len = 2; 6083 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6084 %} 6085 ins_pipe( pipe_slow ); 6086 %} 6087 6088 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6089 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6090 match(Set dst (AddVB src (LoadVector mem))); 6091 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6092 ins_encode %{ 6093 int vector_len = 2; 6094 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6095 %} 6096 ins_pipe( pipe_slow ); 6097 %} 6098 6099 // Shorts/Chars vector add 6100 instruct vadd2S(vecS dst, vecS src) %{ 6101 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6102 match(Set dst (AddVS dst src)); 6103 format %{ "paddw $dst,$src\t! add packed2S" %} 6104 ins_encode %{ 6105 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6111 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6112 match(Set dst (AddVS src1 src2)); 6113 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6114 ins_encode %{ 6115 int vector_len = 0; 6116 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6117 %} 6118 ins_pipe( pipe_slow ); 6119 %} 6120 6121 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6122 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6123 match(Set dst (AddVS src (LoadVector mem))); 6124 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6125 ins_encode %{ 6126 int vector_len = 0; 6127 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6128 %} 6129 ins_pipe( pipe_slow ); 6130 %} 6131 6132 instruct vadd4S(vecD dst, vecD src) %{ 6133 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6134 match(Set dst (AddVS dst src)); 6135 format %{ "paddw $dst,$src\t! add packed4S" %} 6136 ins_encode %{ 6137 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6143 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6144 match(Set dst (AddVS src1 src2)); 6145 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6146 ins_encode %{ 6147 int vector_len = 0; 6148 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6155 match(Set dst (AddVS src (LoadVector mem))); 6156 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6157 ins_encode %{ 6158 int vector_len = 0; 6159 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 instruct vadd8S(vecX dst, vecX src) %{ 6165 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6166 match(Set dst (AddVS dst src)); 6167 format %{ "paddw $dst,$src\t! add packed8S" %} 6168 ins_encode %{ 6169 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6170 %} 6171 ins_pipe( pipe_slow ); 6172 %} 6173 6174 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6175 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6176 match(Set dst (AddVS src1 src2)); 6177 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6178 ins_encode %{ 6179 int vector_len = 0; 6180 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6181 %} 6182 ins_pipe( pipe_slow ); 6183 %} 6184 6185 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6186 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6187 match(Set dst (AddVS src (LoadVector mem))); 6188 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6189 ins_encode %{ 6190 int vector_len = 0; 6191 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6197 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6198 match(Set dst (AddVS src1 src2)); 6199 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6200 ins_encode %{ 6201 int vector_len = 1; 6202 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6203 %} 6204 ins_pipe( pipe_slow ); 6205 %} 6206 6207 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6208 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6209 match(Set dst (AddVS src (LoadVector mem))); 6210 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6211 ins_encode %{ 6212 int vector_len = 1; 6213 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6219 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6220 match(Set dst (AddVS src1 src2)); 6221 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6222 ins_encode %{ 6223 int vector_len = 2; 6224 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6230 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6231 match(Set dst (AddVS src (LoadVector mem))); 6232 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6233 ins_encode %{ 6234 int vector_len = 2; 6235 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6236 %} 6237 ins_pipe( pipe_slow ); 6238 %} 6239 6240 // Integers vector add 6241 instruct vadd2I(vecD dst, vecD src) %{ 6242 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6243 match(Set dst (AddVI dst src)); 6244 format %{ "paddd $dst,$src\t! add packed2I" %} 6245 ins_encode %{ 6246 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6247 %} 6248 ins_pipe( pipe_slow ); 6249 %} 6250 6251 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6252 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6253 match(Set dst (AddVI src1 src2)); 6254 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6255 ins_encode %{ 6256 int vector_len = 0; 6257 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6263 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6264 match(Set dst (AddVI src (LoadVector mem))); 6265 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6266 ins_encode %{ 6267 int vector_len = 0; 6268 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vadd4I(vecX dst, vecX src) %{ 6274 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6275 match(Set dst (AddVI dst src)); 6276 format %{ "paddd $dst,$src\t! add packed4I" %} 6277 ins_encode %{ 6278 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6279 %} 6280 ins_pipe( pipe_slow ); 6281 %} 6282 6283 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6284 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6285 match(Set dst (AddVI src1 src2)); 6286 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6287 ins_encode %{ 6288 int vector_len = 0; 6289 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6290 %} 6291 ins_pipe( pipe_slow ); 6292 %} 6293 6294 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6295 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6296 match(Set dst (AddVI src (LoadVector mem))); 6297 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6298 ins_encode %{ 6299 int vector_len = 0; 6300 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6301 %} 6302 ins_pipe( pipe_slow ); 6303 %} 6304 6305 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6306 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6307 match(Set dst (AddVI src1 src2)); 6308 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6309 ins_encode %{ 6310 int vector_len = 1; 6311 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6312 %} 6313 ins_pipe( pipe_slow ); 6314 %} 6315 6316 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6317 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6318 match(Set dst (AddVI src (LoadVector mem))); 6319 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6320 ins_encode %{ 6321 int vector_len = 1; 6322 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6323 %} 6324 ins_pipe( pipe_slow ); 6325 %} 6326 6327 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6328 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6329 match(Set dst (AddVI src1 src2)); 6330 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6331 ins_encode %{ 6332 int vector_len = 2; 6333 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6334 %} 6335 ins_pipe( pipe_slow ); 6336 %} 6337 6338 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6339 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6340 match(Set dst (AddVI src (LoadVector mem))); 6341 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6342 ins_encode %{ 6343 int vector_len = 2; 6344 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 // Longs vector add 6350 instruct vadd2L(vecX dst, vecX src) %{ 6351 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6352 match(Set dst (AddVL dst src)); 6353 format %{ "paddq $dst,$src\t! add packed2L" %} 6354 ins_encode %{ 6355 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6361 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6362 match(Set dst (AddVL src1 src2)); 6363 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6364 ins_encode %{ 6365 int vector_len = 0; 6366 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6367 %} 6368 ins_pipe( pipe_slow ); 6369 %} 6370 6371 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6372 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6373 match(Set dst (AddVL src (LoadVector mem))); 6374 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6375 ins_encode %{ 6376 int vector_len = 0; 6377 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6378 %} 6379 ins_pipe( pipe_slow ); 6380 %} 6381 6382 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6383 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6384 match(Set dst (AddVL src1 src2)); 6385 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6386 ins_encode %{ 6387 int vector_len = 1; 6388 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6394 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6395 match(Set dst (AddVL src (LoadVector mem))); 6396 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6397 ins_encode %{ 6398 int vector_len = 1; 6399 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6400 %} 6401 ins_pipe( pipe_slow ); 6402 %} 6403 6404 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6405 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6406 match(Set dst (AddVL src1 src2)); 6407 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6408 ins_encode %{ 6409 int vector_len = 2; 6410 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6416 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6417 match(Set dst (AddVL src (LoadVector mem))); 6418 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6419 ins_encode %{ 6420 int vector_len = 2; 6421 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 // Floats vector add 6427 instruct vadd2F(vecD dst, vecD src) %{ 6428 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6429 match(Set dst (AddVF dst src)); 6430 format %{ "addps $dst,$src\t! add packed2F" %} 6431 ins_encode %{ 6432 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6439 match(Set dst (AddVF src1 src2)); 6440 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6450 match(Set dst (AddVF src (LoadVector mem))); 6451 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6452 ins_encode %{ 6453 int vector_len = 0; 6454 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vadd4F(vecX dst, vecX src) %{ 6460 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6461 match(Set dst (AddVF dst src)); 6462 format %{ "addps $dst,$src\t! add packed4F" %} 6463 ins_encode %{ 6464 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6470 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6471 match(Set dst (AddVF src1 src2)); 6472 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6473 ins_encode %{ 6474 int vector_len = 0; 6475 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6481 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6482 match(Set dst (AddVF src (LoadVector mem))); 6483 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6484 ins_encode %{ 6485 int vector_len = 0; 6486 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6492 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6493 match(Set dst (AddVF src1 src2)); 6494 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6495 ins_encode %{ 6496 int vector_len = 1; 6497 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6504 match(Set dst (AddVF src (LoadVector mem))); 6505 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6506 ins_encode %{ 6507 int vector_len = 1; 6508 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6514 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6515 match(Set dst (AddVF src1 src2)); 6516 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6517 ins_encode %{ 6518 int vector_len = 2; 6519 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6525 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6526 match(Set dst (AddVF src (LoadVector mem))); 6527 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6528 ins_encode %{ 6529 int vector_len = 2; 6530 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 // Doubles vector add 6536 instruct vadd2D(vecX dst, vecX src) %{ 6537 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6538 match(Set dst (AddVD dst src)); 6539 format %{ "addpd $dst,$src\t! add packed2D" %} 6540 ins_encode %{ 6541 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6548 match(Set dst (AddVD src1 src2)); 6549 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6550 ins_encode %{ 6551 int vector_len = 0; 6552 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6558 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6559 match(Set dst (AddVD src (LoadVector mem))); 6560 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6561 ins_encode %{ 6562 int vector_len = 0; 6563 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6570 match(Set dst (AddVD src1 src2)); 6571 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6572 ins_encode %{ 6573 int vector_len = 1; 6574 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6580 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6581 match(Set dst (AddVD src (LoadVector mem))); 6582 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6583 ins_encode %{ 6584 int vector_len = 1; 6585 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6591 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6592 match(Set dst (AddVD src1 src2)); 6593 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6594 ins_encode %{ 6595 int vector_len = 2; 6596 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6602 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6603 match(Set dst (AddVD src (LoadVector mem))); 6604 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6605 ins_encode %{ 6606 int vector_len = 2; 6607 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 // --------------------------------- SUB -------------------------------------- 6613 6614 // Bytes vector sub 6615 instruct vsub4B(vecS dst, vecS src) %{ 6616 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6617 match(Set dst (SubVB dst src)); 6618 format %{ "psubb $dst,$src\t! sub packed4B" %} 6619 ins_encode %{ 6620 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6626 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6627 match(Set dst (SubVB src1 src2)); 6628 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6629 ins_encode %{ 6630 int vector_len = 0; 6631 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6637 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6638 match(Set dst (SubVB src (LoadVector mem))); 6639 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6640 ins_encode %{ 6641 int vector_len = 0; 6642 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6643 %} 6644 ins_pipe( pipe_slow ); 6645 %} 6646 6647 instruct vsub8B(vecD dst, vecD src) %{ 6648 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6649 match(Set dst (SubVB dst src)); 6650 format %{ "psubb $dst,$src\t! sub packed8B" %} 6651 ins_encode %{ 6652 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6653 %} 6654 ins_pipe( pipe_slow ); 6655 %} 6656 6657 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6658 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6659 match(Set dst (SubVB src1 src2)); 6660 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6661 ins_encode %{ 6662 int vector_len = 0; 6663 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6664 %} 6665 ins_pipe( pipe_slow ); 6666 %} 6667 6668 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6669 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6670 match(Set dst (SubVB src (LoadVector mem))); 6671 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6672 ins_encode %{ 6673 int vector_len = 0; 6674 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6675 %} 6676 ins_pipe( pipe_slow ); 6677 %} 6678 6679 instruct vsub16B(vecX dst, vecX src) %{ 6680 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6681 match(Set dst (SubVB dst src)); 6682 format %{ "psubb $dst,$src\t! sub packed16B" %} 6683 ins_encode %{ 6684 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6690 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6691 match(Set dst (SubVB src1 src2)); 6692 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6693 ins_encode %{ 6694 int vector_len = 0; 6695 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6696 %} 6697 ins_pipe( pipe_slow ); 6698 %} 6699 6700 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6701 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6702 match(Set dst (SubVB src (LoadVector mem))); 6703 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6704 ins_encode %{ 6705 int vector_len = 0; 6706 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6707 %} 6708 ins_pipe( pipe_slow ); 6709 %} 6710 6711 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6712 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6713 match(Set dst (SubVB src1 src2)); 6714 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6715 ins_encode %{ 6716 int vector_len = 1; 6717 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6723 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6724 match(Set dst (SubVB src (LoadVector mem))); 6725 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6726 ins_encode %{ 6727 int vector_len = 1; 6728 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6729 %} 6730 ins_pipe( pipe_slow ); 6731 %} 6732 6733 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6734 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6735 match(Set dst (SubVB src1 src2)); 6736 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6737 ins_encode %{ 6738 int vector_len = 2; 6739 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6740 %} 6741 ins_pipe( pipe_slow ); 6742 %} 6743 6744 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6745 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6746 match(Set dst (SubVB src (LoadVector mem))); 6747 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6748 ins_encode %{ 6749 int vector_len = 2; 6750 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6751 %} 6752 ins_pipe( pipe_slow ); 6753 %} 6754 6755 // Shorts/Chars vector sub 6756 instruct vsub2S(vecS dst, vecS src) %{ 6757 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6758 match(Set dst (SubVS dst src)); 6759 format %{ "psubw $dst,$src\t! sub packed2S" %} 6760 ins_encode %{ 6761 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6762 %} 6763 ins_pipe( pipe_slow ); 6764 %} 6765 6766 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6767 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6768 match(Set dst (SubVS src1 src2)); 6769 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6770 ins_encode %{ 6771 int vector_len = 0; 6772 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6773 %} 6774 ins_pipe( pipe_slow ); 6775 %} 6776 6777 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6778 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6779 match(Set dst (SubVS src (LoadVector mem))); 6780 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6781 ins_encode %{ 6782 int vector_len = 0; 6783 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6784 %} 6785 ins_pipe( pipe_slow ); 6786 %} 6787 6788 instruct vsub4S(vecD dst, vecD src) %{ 6789 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6790 match(Set dst (SubVS dst src)); 6791 format %{ "psubw $dst,$src\t! sub packed4S" %} 6792 ins_encode %{ 6793 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6794 %} 6795 ins_pipe( pipe_slow ); 6796 %} 6797 6798 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6799 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6800 match(Set dst (SubVS src1 src2)); 6801 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6802 ins_encode %{ 6803 int vector_len = 0; 6804 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6805 %} 6806 ins_pipe( pipe_slow ); 6807 %} 6808 6809 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6810 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6811 match(Set dst (SubVS src (LoadVector mem))); 6812 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6813 ins_encode %{ 6814 int vector_len = 0; 6815 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6816 %} 6817 ins_pipe( pipe_slow ); 6818 %} 6819 6820 instruct vsub8S(vecX dst, vecX src) %{ 6821 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6822 match(Set dst (SubVS dst src)); 6823 format %{ "psubw $dst,$src\t! sub packed8S" %} 6824 ins_encode %{ 6825 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6826 %} 6827 ins_pipe( pipe_slow ); 6828 %} 6829 6830 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6831 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6832 match(Set dst (SubVS src1 src2)); 6833 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6834 ins_encode %{ 6835 int vector_len = 0; 6836 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6842 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6843 match(Set dst (SubVS src (LoadVector mem))); 6844 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6845 ins_encode %{ 6846 int vector_len = 0; 6847 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6853 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6854 match(Set dst (SubVS src1 src2)); 6855 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6856 ins_encode %{ 6857 int vector_len = 1; 6858 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6859 %} 6860 ins_pipe( pipe_slow ); 6861 %} 6862 6863 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6864 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6865 match(Set dst (SubVS src (LoadVector mem))); 6866 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6867 ins_encode %{ 6868 int vector_len = 1; 6869 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6870 %} 6871 ins_pipe( pipe_slow ); 6872 %} 6873 6874 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6875 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6876 match(Set dst (SubVS src1 src2)); 6877 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6878 ins_encode %{ 6879 int vector_len = 2; 6880 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6881 %} 6882 ins_pipe( pipe_slow ); 6883 %} 6884 6885 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6886 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6887 match(Set dst (SubVS src (LoadVector mem))); 6888 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6889 ins_encode %{ 6890 int vector_len = 2; 6891 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 // Integers vector sub 6897 instruct vsub2I(vecD dst, vecD src) %{ 6898 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6899 match(Set dst (SubVI dst src)); 6900 format %{ "psubd $dst,$src\t! sub packed2I" %} 6901 ins_encode %{ 6902 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6908 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6909 match(Set dst (SubVI src1 src2)); 6910 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6911 ins_encode %{ 6912 int vector_len = 0; 6913 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6914 %} 6915 ins_pipe( pipe_slow ); 6916 %} 6917 6918 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6919 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6920 match(Set dst (SubVI src (LoadVector mem))); 6921 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6922 ins_encode %{ 6923 int vector_len = 0; 6924 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 instruct vsub4I(vecX dst, vecX src) %{ 6930 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6931 match(Set dst (SubVI dst src)); 6932 format %{ "psubd $dst,$src\t! sub packed4I" %} 6933 ins_encode %{ 6934 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6935 %} 6936 ins_pipe( pipe_slow ); 6937 %} 6938 6939 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6940 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6941 match(Set dst (SubVI src1 src2)); 6942 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6943 ins_encode %{ 6944 int vector_len = 0; 6945 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6946 %} 6947 ins_pipe( pipe_slow ); 6948 %} 6949 6950 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6951 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6952 match(Set dst (SubVI src (LoadVector mem))); 6953 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6954 ins_encode %{ 6955 int vector_len = 0; 6956 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6957 %} 6958 ins_pipe( pipe_slow ); 6959 %} 6960 6961 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6962 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6963 match(Set dst (SubVI src1 src2)); 6964 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6965 ins_encode %{ 6966 int vector_len = 1; 6967 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6968 %} 6969 ins_pipe( pipe_slow ); 6970 %} 6971 6972 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6973 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6974 match(Set dst (SubVI src (LoadVector mem))); 6975 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6976 ins_encode %{ 6977 int vector_len = 1; 6978 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6984 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6985 match(Set dst (SubVI src1 src2)); 6986 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 6987 ins_encode %{ 6988 int vector_len = 2; 6989 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6990 %} 6991 ins_pipe( pipe_slow ); 6992 %} 6993 6994 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 6995 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6996 match(Set dst (SubVI src (LoadVector mem))); 6997 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 6998 ins_encode %{ 6999 int vector_len = 2; 7000 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7001 %} 7002 ins_pipe( pipe_slow ); 7003 %} 7004 7005 // Longs vector sub 7006 instruct vsub2L(vecX dst, vecX src) %{ 7007 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7008 match(Set dst (SubVL dst src)); 7009 format %{ "psubq $dst,$src\t! sub packed2L" %} 7010 ins_encode %{ 7011 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7012 %} 7013 ins_pipe( pipe_slow ); 7014 %} 7015 7016 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7017 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7018 match(Set dst (SubVL src1 src2)); 7019 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7020 ins_encode %{ 7021 int vector_len = 0; 7022 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7023 %} 7024 ins_pipe( pipe_slow ); 7025 %} 7026 7027 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7028 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7029 match(Set dst (SubVL src (LoadVector mem))); 7030 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7031 ins_encode %{ 7032 int vector_len = 0; 7033 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7034 %} 7035 ins_pipe( pipe_slow ); 7036 %} 7037 7038 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7039 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7040 match(Set dst (SubVL src1 src2)); 7041 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7042 ins_encode %{ 7043 int vector_len = 1; 7044 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7045 %} 7046 ins_pipe( pipe_slow ); 7047 %} 7048 7049 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7050 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7051 match(Set dst (SubVL src (LoadVector mem))); 7052 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7053 ins_encode %{ 7054 int vector_len = 1; 7055 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7056 %} 7057 ins_pipe( pipe_slow ); 7058 %} 7059 7060 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7061 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7062 match(Set dst (SubVL src1 src2)); 7063 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7064 ins_encode %{ 7065 int vector_len = 2; 7066 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7067 %} 7068 ins_pipe( pipe_slow ); 7069 %} 7070 7071 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7072 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7073 match(Set dst (SubVL src (LoadVector mem))); 7074 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7075 ins_encode %{ 7076 int vector_len = 2; 7077 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7078 %} 7079 ins_pipe( pipe_slow ); 7080 %} 7081 7082 // Floats vector sub 7083 instruct vsub2F(vecD dst, vecD src) %{ 7084 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7085 match(Set dst (SubVF dst src)); 7086 format %{ "subps $dst,$src\t! sub packed2F" %} 7087 ins_encode %{ 7088 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7089 %} 7090 ins_pipe( pipe_slow ); 7091 %} 7092 7093 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7094 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7095 match(Set dst (SubVF src1 src2)); 7096 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7097 ins_encode %{ 7098 int vector_len = 0; 7099 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7100 %} 7101 ins_pipe( pipe_slow ); 7102 %} 7103 7104 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7105 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7106 match(Set dst (SubVF src (LoadVector mem))); 7107 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7108 ins_encode %{ 7109 int vector_len = 0; 7110 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 instruct vsub4F(vecX dst, vecX src) %{ 7116 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7117 match(Set dst (SubVF dst src)); 7118 format %{ "subps $dst,$src\t! sub packed4F" %} 7119 ins_encode %{ 7120 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7121 %} 7122 ins_pipe( pipe_slow ); 7123 %} 7124 7125 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7126 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7127 match(Set dst (SubVF src1 src2)); 7128 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7129 ins_encode %{ 7130 int vector_len = 0; 7131 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7132 %} 7133 ins_pipe( pipe_slow ); 7134 %} 7135 7136 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7138 match(Set dst (SubVF src (LoadVector mem))); 7139 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7140 ins_encode %{ 7141 int vector_len = 0; 7142 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7148 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7149 match(Set dst (SubVF src1 src2)); 7150 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7151 ins_encode %{ 7152 int vector_len = 1; 7153 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7154 %} 7155 ins_pipe( pipe_slow ); 7156 %} 7157 7158 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7159 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7160 match(Set dst (SubVF src (LoadVector mem))); 7161 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7162 ins_encode %{ 7163 int vector_len = 1; 7164 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7165 %} 7166 ins_pipe( pipe_slow ); 7167 %} 7168 7169 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7170 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7171 match(Set dst (SubVF src1 src2)); 7172 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7173 ins_encode %{ 7174 int vector_len = 2; 7175 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7176 %} 7177 ins_pipe( pipe_slow ); 7178 %} 7179 7180 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7181 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7182 match(Set dst (SubVF src (LoadVector mem))); 7183 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7184 ins_encode %{ 7185 int vector_len = 2; 7186 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7187 %} 7188 ins_pipe( pipe_slow ); 7189 %} 7190 7191 // Doubles vector sub 7192 instruct vsub2D(vecX dst, vecX src) %{ 7193 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7194 match(Set dst (SubVD dst src)); 7195 format %{ "subpd $dst,$src\t! sub packed2D" %} 7196 ins_encode %{ 7197 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7198 %} 7199 ins_pipe( pipe_slow ); 7200 %} 7201 7202 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7204 match(Set dst (SubVD src1 src2)); 7205 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7206 ins_encode %{ 7207 int vector_len = 0; 7208 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7209 %} 7210 ins_pipe( pipe_slow ); 7211 %} 7212 7213 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7215 match(Set dst (SubVD src (LoadVector mem))); 7216 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7217 ins_encode %{ 7218 int vector_len = 0; 7219 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7220 %} 7221 ins_pipe( pipe_slow ); 7222 %} 7223 7224 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7225 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7226 match(Set dst (SubVD src1 src2)); 7227 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7228 ins_encode %{ 7229 int vector_len = 1; 7230 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7236 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7237 match(Set dst (SubVD src (LoadVector mem))); 7238 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7239 ins_encode %{ 7240 int vector_len = 1; 7241 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7242 %} 7243 ins_pipe( pipe_slow ); 7244 %} 7245 7246 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7247 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7248 match(Set dst (SubVD src1 src2)); 7249 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7250 ins_encode %{ 7251 int vector_len = 2; 7252 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7253 %} 7254 ins_pipe( pipe_slow ); 7255 %} 7256 7257 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7258 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7259 match(Set dst (SubVD src (LoadVector mem))); 7260 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7261 ins_encode %{ 7262 int vector_len = 2; 7263 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 // --------------------------------- MUL -------------------------------------- 7269 7270 // Byte vector mul 7271 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{ 7272 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7273 match(Set dst (MulVB src1 src2)); 7274 effect(TEMP dst, TEMP tmp, TEMP scratch); 7275 format %{"pmovsxbw $tmp,$src1\n\t" 7276 "pmovsxbw $dst,$src2\n\t" 7277 "pmullw $tmp,$dst\n\t" 7278 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7279 "pand $dst,$tmp\n\t" 7280 "packuswb $dst,$dst\t! mul packed4B" %} 7281 ins_encode %{ 7282 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7283 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7284 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7285 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7286 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7287 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{ 7293 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 7294 match(Set dst (MulVB src1 src2)); 7295 effect(TEMP dst, TEMP tmp, TEMP scratch); 7296 format %{"pmovsxbw $tmp,$src1\n\t" 7297 "pmovsxbw $dst,$src2\n\t" 7298 "pmullw $tmp,$dst\n\t" 7299 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7300 "pand $dst,$tmp\n\t" 7301 "packuswb $dst,$dst\t! mul packed8B" %} 7302 ins_encode %{ 7303 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7304 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7305 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7306 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7307 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7308 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7309 %} 7310 ins_pipe( pipe_slow ); 7311 %} 7312 7313 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{ 7314 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 7315 match(Set dst (MulVB src1 src2)); 7316 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7317 format %{"pmovsxbw $tmp1,$src1\n\t" 7318 "pmovsxbw $tmp2,$src2\n\t" 7319 "pmullw $tmp1,$tmp2\n\t" 7320 "pshufd $tmp2,$src1,0xEE\n\t" 7321 "pshufd $dst,$src2,0xEE\n\t" 7322 "pmovsxbw $tmp2,$tmp2\n\t" 7323 "pmovsxbw $dst,$dst\n\t" 7324 "pmullw $tmp2,$dst\n\t" 7325 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7326 "pand $tmp2,$dst\n\t" 7327 "pand $dst,$tmp1\n\t" 7328 "packuswb $dst,$tmp2\t! mul packed16B" %} 7329 ins_encode %{ 7330 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 7331 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7332 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 7333 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 7334 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 7335 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 7336 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 7337 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 7338 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7339 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 7340 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 7341 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 7342 %} 7343 ins_pipe( pipe_slow ); 7344 %} 7345 7346 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{ 7347 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7348 match(Set dst (MulVB src1 src2)); 7349 effect(TEMP dst, TEMP tmp, TEMP scratch); 7350 format %{"vpmovsxbw $tmp,$src1\n\t" 7351 "vpmovsxbw $dst,$src2\n\t" 7352 "vpmullw $tmp,$tmp,$dst\n\t" 7353 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7354 "vpand $dst,$dst,$tmp\n\t" 7355 "vextracti128_high $tmp,$dst\n\t" 7356 "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %} 7357 ins_encode %{ 7358 int vector_len = 1; 7359 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 7360 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7361 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 7362 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7363 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 7364 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 7365 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{ 7371 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 7372 match(Set dst (MulVB src1 src2)); 7373 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7374 format %{"vextracti128_high $tmp1,$src1\n\t" 7375 "vextracti128_high $dst,$src2\n\t" 7376 "vpmovsxbw $tmp1,$tmp1\n\t" 7377 "vpmovsxbw $dst,$dst\n\t" 7378 "vpmullw $tmp1,$tmp1,$dst\n\t" 7379 "vpmovsxbw $tmp2,$src1\n\t" 7380 "vpmovsxbw $dst,$src2\n\t" 7381 "vpmullw $tmp2,$tmp2,$dst\n\t" 7382 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7383 "vpbroadcastd $dst, $dst\n\t" 7384 "vpand $tmp1,$tmp1,$dst\n\t" 7385 "vpand $dst,$dst,$tmp2\n\t" 7386 "vpackuswb $dst,$dst,$tmp1\n\t" 7387 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 7388 ins_encode %{ 7389 int vector_len = 1; 7390 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7391 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 7392 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7393 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7394 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7395 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7396 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7397 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7398 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7399 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7400 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7401 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7402 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7403 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 7404 %} 7405 ins_pipe( pipe_slow ); 7406 %} 7407 7408 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 7409 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 7410 match(Set dst (MulVB src1 src2)); 7411 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7412 format %{"vextracti64x4_high $tmp1,$src1\n\t" 7413 "vextracti64x4_high $dst,$src2\n\t" 7414 "vpmovsxbw $tmp1,$tmp1\n\t" 7415 "vpmovsxbw $dst,$dst\n\t" 7416 "vpmullw $tmp1,$tmp1,$dst\n\t" 7417 "vpmovsxbw $tmp2,$src1\n\t" 7418 "vpmovsxbw $dst,$src2\n\t" 7419 "vpmullw $tmp2,$tmp2,$dst\n\t" 7420 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7421 "vpbroadcastd $dst, $dst\n\t" 7422 "vpand $tmp1,$tmp1,$dst\n\t" 7423 "vpand $tmp2,$tmp2,$dst\n\t" 7424 "vpackuswb $dst,$tmp1,$tmp2\n\t" 7425 "evmovdquq $tmp2,[0x0604020007050301]\n\t" 7426 "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %} 7427 7428 ins_encode %{ 7429 int vector_len = 2; 7430 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7431 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 7432 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7433 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7434 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7435 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7436 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7437 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7438 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7439 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7440 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7441 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7442 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7443 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 7444 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7445 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 // Shorts/Chars vector mul 7451 instruct vmul2S(vecS dst, vecS src) %{ 7452 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7453 match(Set dst (MulVS dst src)); 7454 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7455 ins_encode %{ 7456 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7462 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7463 match(Set dst (MulVS src1 src2)); 7464 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7465 ins_encode %{ 7466 int vector_len = 0; 7467 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7473 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7474 match(Set dst (MulVS src (LoadVector mem))); 7475 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7476 ins_encode %{ 7477 int vector_len = 0; 7478 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vmul4S(vecD dst, vecD src) %{ 7484 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7485 match(Set dst (MulVS dst src)); 7486 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7487 ins_encode %{ 7488 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7489 %} 7490 ins_pipe( pipe_slow ); 7491 %} 7492 7493 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7494 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7495 match(Set dst (MulVS src1 src2)); 7496 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7497 ins_encode %{ 7498 int vector_len = 0; 7499 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7505 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7506 match(Set dst (MulVS src (LoadVector mem))); 7507 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7508 ins_encode %{ 7509 int vector_len = 0; 7510 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vmul8S(vecX dst, vecX src) %{ 7516 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7517 match(Set dst (MulVS dst src)); 7518 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7519 ins_encode %{ 7520 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7521 %} 7522 ins_pipe( pipe_slow ); 7523 %} 7524 7525 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7526 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7527 match(Set dst (MulVS src1 src2)); 7528 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7529 ins_encode %{ 7530 int vector_len = 0; 7531 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7532 %} 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7537 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7538 match(Set dst (MulVS src (LoadVector mem))); 7539 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7540 ins_encode %{ 7541 int vector_len = 0; 7542 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7543 %} 7544 ins_pipe( pipe_slow ); 7545 %} 7546 7547 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7548 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7549 match(Set dst (MulVS src1 src2)); 7550 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7551 ins_encode %{ 7552 int vector_len = 1; 7553 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7554 %} 7555 ins_pipe( pipe_slow ); 7556 %} 7557 7558 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7559 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7560 match(Set dst (MulVS src (LoadVector mem))); 7561 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7562 ins_encode %{ 7563 int vector_len = 1; 7564 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7570 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7571 match(Set dst (MulVS src1 src2)); 7572 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7573 ins_encode %{ 7574 int vector_len = 2; 7575 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7576 %} 7577 ins_pipe( pipe_slow ); 7578 %} 7579 7580 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7581 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7582 match(Set dst (MulVS src (LoadVector mem))); 7583 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7584 ins_encode %{ 7585 int vector_len = 2; 7586 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7587 %} 7588 ins_pipe( pipe_slow ); 7589 %} 7590 7591 // Integers vector mul (sse4_1) 7592 instruct vmul2I(vecD dst, vecD src) %{ 7593 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7594 match(Set dst (MulVI dst src)); 7595 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7596 ins_encode %{ 7597 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7598 %} 7599 ins_pipe( pipe_slow ); 7600 %} 7601 7602 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7604 match(Set dst (MulVI src1 src2)); 7605 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7606 ins_encode %{ 7607 int vector_len = 0; 7608 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7609 %} 7610 ins_pipe( pipe_slow ); 7611 %} 7612 7613 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7615 match(Set dst (MulVI src (LoadVector mem))); 7616 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7617 ins_encode %{ 7618 int vector_len = 0; 7619 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7620 %} 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 instruct vmul4I(vecX dst, vecX src) %{ 7625 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7626 match(Set dst (MulVI dst src)); 7627 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7628 ins_encode %{ 7629 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7630 %} 7631 ins_pipe( pipe_slow ); 7632 %} 7633 7634 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7635 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7636 match(Set dst (MulVI src1 src2)); 7637 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7638 ins_encode %{ 7639 int vector_len = 0; 7640 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7641 %} 7642 ins_pipe( pipe_slow ); 7643 %} 7644 7645 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7646 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7647 match(Set dst (MulVI src (LoadVector mem))); 7648 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7649 ins_encode %{ 7650 int vector_len = 0; 7651 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7652 %} 7653 ins_pipe( pipe_slow ); 7654 %} 7655 7656 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7657 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7658 match(Set dst (MulVL src1 src2)); 7659 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7660 ins_encode %{ 7661 int vector_len = 0; 7662 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7663 %} 7664 ins_pipe( pipe_slow ); 7665 %} 7666 7667 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7668 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7669 match(Set dst (MulVL src (LoadVector mem))); 7670 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7671 ins_encode %{ 7672 int vector_len = 0; 7673 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7679 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7680 match(Set dst (MulVL src1 src2)); 7681 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7682 ins_encode %{ 7683 int vector_len = 1; 7684 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7690 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7691 match(Set dst (MulVL src (LoadVector mem))); 7692 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7693 ins_encode %{ 7694 int vector_len = 1; 7695 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7701 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7702 match(Set dst (MulVL src1 src2)); 7703 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7704 ins_encode %{ 7705 int vector_len = 2; 7706 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7712 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7713 match(Set dst (MulVL src (LoadVector mem))); 7714 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7715 ins_encode %{ 7716 int vector_len = 2; 7717 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7723 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7724 match(Set dst (MulVI src1 src2)); 7725 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7726 ins_encode %{ 7727 int vector_len = 1; 7728 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7734 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7735 match(Set dst (MulVI src (LoadVector mem))); 7736 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7737 ins_encode %{ 7738 int vector_len = 1; 7739 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7745 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7746 match(Set dst (MulVI src1 src2)); 7747 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7748 ins_encode %{ 7749 int vector_len = 2; 7750 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7756 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7757 match(Set dst (MulVI src (LoadVector mem))); 7758 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7759 ins_encode %{ 7760 int vector_len = 2; 7761 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 7766 // Floats vector mul 7767 instruct vmul2F(vecD dst, vecD src) %{ 7768 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7769 match(Set dst (MulVF dst src)); 7770 format %{ "mulps $dst,$src\t! mul packed2F" %} 7771 ins_encode %{ 7772 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7778 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7779 match(Set dst (MulVF src1 src2)); 7780 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7781 ins_encode %{ 7782 int vector_len = 0; 7783 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7789 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7790 match(Set dst (MulVF src (LoadVector mem))); 7791 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7792 ins_encode %{ 7793 int vector_len = 0; 7794 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vmul4F(vecX dst, vecX src) %{ 7800 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7801 match(Set dst (MulVF dst src)); 7802 format %{ "mulps $dst,$src\t! mul packed4F" %} 7803 ins_encode %{ 7804 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7805 %} 7806 ins_pipe( pipe_slow ); 7807 %} 7808 7809 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7810 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7811 match(Set dst (MulVF src1 src2)); 7812 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7813 ins_encode %{ 7814 int vector_len = 0; 7815 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7821 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7822 match(Set dst (MulVF src (LoadVector mem))); 7823 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7824 ins_encode %{ 7825 int vector_len = 0; 7826 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7832 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7833 match(Set dst (MulVF src1 src2)); 7834 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7835 ins_encode %{ 7836 int vector_len = 1; 7837 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7838 %} 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7843 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7844 match(Set dst (MulVF src (LoadVector mem))); 7845 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7846 ins_encode %{ 7847 int vector_len = 1; 7848 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7854 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7855 match(Set dst (MulVF src1 src2)); 7856 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7857 ins_encode %{ 7858 int vector_len = 2; 7859 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7860 %} 7861 ins_pipe( pipe_slow ); 7862 %} 7863 7864 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7865 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7866 match(Set dst (MulVF src (LoadVector mem))); 7867 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7868 ins_encode %{ 7869 int vector_len = 2; 7870 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7871 %} 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 // Doubles vector mul 7876 instruct vmul2D(vecX dst, vecX src) %{ 7877 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7878 match(Set dst (MulVD dst src)); 7879 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7880 ins_encode %{ 7881 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7887 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7888 match(Set dst (MulVD src1 src2)); 7889 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7890 ins_encode %{ 7891 int vector_len = 0; 7892 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7898 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7899 match(Set dst (MulVD src (LoadVector mem))); 7900 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7901 ins_encode %{ 7902 int vector_len = 0; 7903 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7909 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7910 match(Set dst (MulVD src1 src2)); 7911 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7912 ins_encode %{ 7913 int vector_len = 1; 7914 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7920 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7921 match(Set dst (MulVD src (LoadVector mem))); 7922 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7923 ins_encode %{ 7924 int vector_len = 1; 7925 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7931 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7932 match(Set dst (MulVD src1 src2)); 7933 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7934 ins_encode %{ 7935 int vector_len = 2; 7936 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7942 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7943 match(Set dst (MulVD src (LoadVector mem))); 7944 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7945 ins_encode %{ 7946 int vector_len = 2; 7947 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7948 %} 7949 ins_pipe( pipe_slow ); 7950 %} 7951 7952 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7953 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7954 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7955 effect(TEMP dst, USE src1, USE src2); 7956 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7957 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7958 %} 7959 ins_encode %{ 7960 int vector_len = 1; 7961 int cond = (Assembler::Condition)($copnd$$cmpcode); 7962 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7963 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7964 %} 7965 ins_pipe( pipe_slow ); 7966 %} 7967 7968 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7969 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7970 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7971 effect(TEMP dst, USE src1, USE src2); 7972 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7973 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7974 %} 7975 ins_encode %{ 7976 int vector_len = 1; 7977 int cond = (Assembler::Condition)($copnd$$cmpcode); 7978 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7979 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 // --------------------------------- DIV -------------------------------------- 7985 7986 // Floats vector div 7987 instruct vdiv2F(vecD dst, vecD src) %{ 7988 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7989 match(Set dst (DivVF dst src)); 7990 format %{ "divps $dst,$src\t! div packed2F" %} 7991 ins_encode %{ 7992 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7993 %} 7994 ins_pipe( pipe_slow ); 7995 %} 7996 7997 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7998 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7999 match(Set dst (DivVF src1 src2)); 8000 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8001 ins_encode %{ 8002 int vector_len = 0; 8003 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8009 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8010 match(Set dst (DivVF src (LoadVector mem))); 8011 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8012 ins_encode %{ 8013 int vector_len = 0; 8014 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8015 %} 8016 ins_pipe( pipe_slow ); 8017 %} 8018 8019 instruct vdiv4F(vecX dst, vecX src) %{ 8020 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8021 match(Set dst (DivVF dst src)); 8022 format %{ "divps $dst,$src\t! div packed4F" %} 8023 ins_encode %{ 8024 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8025 %} 8026 ins_pipe( pipe_slow ); 8027 %} 8028 8029 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8030 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8031 match(Set dst (DivVF src1 src2)); 8032 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8033 ins_encode %{ 8034 int vector_len = 0; 8035 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8041 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8042 match(Set dst (DivVF src (LoadVector mem))); 8043 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8044 ins_encode %{ 8045 int vector_len = 0; 8046 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8047 %} 8048 ins_pipe( pipe_slow ); 8049 %} 8050 8051 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8052 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8053 match(Set dst (DivVF src1 src2)); 8054 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8055 ins_encode %{ 8056 int vector_len = 1; 8057 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8058 %} 8059 ins_pipe( pipe_slow ); 8060 %} 8061 8062 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8063 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8064 match(Set dst (DivVF src (LoadVector mem))); 8065 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8066 ins_encode %{ 8067 int vector_len = 1; 8068 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8069 %} 8070 ins_pipe( pipe_slow ); 8071 %} 8072 8073 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8074 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8075 match(Set dst (DivVF src1 src2)); 8076 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8077 ins_encode %{ 8078 int vector_len = 2; 8079 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8080 %} 8081 ins_pipe( pipe_slow ); 8082 %} 8083 8084 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8085 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8086 match(Set dst (DivVF src (LoadVector mem))); 8087 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8088 ins_encode %{ 8089 int vector_len = 2; 8090 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8091 %} 8092 ins_pipe( pipe_slow ); 8093 %} 8094 8095 // Doubles vector div 8096 instruct vdiv2D(vecX dst, vecX src) %{ 8097 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8098 match(Set dst (DivVD dst src)); 8099 format %{ "divpd $dst,$src\t! div packed2D" %} 8100 ins_encode %{ 8101 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8102 %} 8103 ins_pipe( pipe_slow ); 8104 %} 8105 8106 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8107 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8108 match(Set dst (DivVD src1 src2)); 8109 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8110 ins_encode %{ 8111 int vector_len = 0; 8112 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8113 %} 8114 ins_pipe( pipe_slow ); 8115 %} 8116 8117 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8118 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8119 match(Set dst (DivVD src (LoadVector mem))); 8120 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8121 ins_encode %{ 8122 int vector_len = 0; 8123 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8124 %} 8125 ins_pipe( pipe_slow ); 8126 %} 8127 8128 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8129 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8130 match(Set dst (DivVD src1 src2)); 8131 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8132 ins_encode %{ 8133 int vector_len = 1; 8134 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8135 %} 8136 ins_pipe( pipe_slow ); 8137 %} 8138 8139 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8140 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8141 match(Set dst (DivVD src (LoadVector mem))); 8142 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8143 ins_encode %{ 8144 int vector_len = 1; 8145 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8146 %} 8147 ins_pipe( pipe_slow ); 8148 %} 8149 8150 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8151 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8152 match(Set dst (DivVD src1 src2)); 8153 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8154 ins_encode %{ 8155 int vector_len = 2; 8156 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8157 %} 8158 ins_pipe( pipe_slow ); 8159 %} 8160 8161 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8162 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8163 match(Set dst (DivVD src (LoadVector mem))); 8164 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8165 ins_encode %{ 8166 int vector_len = 2; 8167 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8168 %} 8169 ins_pipe( pipe_slow ); 8170 %} 8171 8172 // --------------------------------- Sqrt -------------------------------------- 8173 8174 // Floating point vector sqrt 8175 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8176 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8177 match(Set dst (SqrtVD src)); 8178 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8179 ins_encode %{ 8180 int vector_len = 0; 8181 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8182 %} 8183 ins_pipe( pipe_slow ); 8184 %} 8185 8186 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8187 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8188 match(Set dst (SqrtVD (LoadVector mem))); 8189 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8190 ins_encode %{ 8191 int vector_len = 0; 8192 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8193 %} 8194 ins_pipe( pipe_slow ); 8195 %} 8196 8197 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8198 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8199 match(Set dst (SqrtVD src)); 8200 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8201 ins_encode %{ 8202 int vector_len = 1; 8203 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8204 %} 8205 ins_pipe( pipe_slow ); 8206 %} 8207 8208 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8209 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8210 match(Set dst (SqrtVD (LoadVector mem))); 8211 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8212 ins_encode %{ 8213 int vector_len = 1; 8214 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8215 %} 8216 ins_pipe( pipe_slow ); 8217 %} 8218 8219 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8220 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8221 match(Set dst (SqrtVD src)); 8222 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8223 ins_encode %{ 8224 int vector_len = 2; 8225 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8226 %} 8227 ins_pipe( pipe_slow ); 8228 %} 8229 8230 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8231 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8232 match(Set dst (SqrtVD (LoadVector mem))); 8233 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8234 ins_encode %{ 8235 int vector_len = 2; 8236 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8237 %} 8238 ins_pipe( pipe_slow ); 8239 %} 8240 8241 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8243 match(Set dst (SqrtVF src)); 8244 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8245 ins_encode %{ 8246 int vector_len = 0; 8247 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8253 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8254 match(Set dst (SqrtVF (LoadVector mem))); 8255 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8256 ins_encode %{ 8257 int vector_len = 0; 8258 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8259 %} 8260 ins_pipe( pipe_slow ); 8261 %} 8262 8263 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8264 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8265 match(Set dst (SqrtVF src)); 8266 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8267 ins_encode %{ 8268 int vector_len = 0; 8269 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8270 %} 8271 ins_pipe( pipe_slow ); 8272 %} 8273 8274 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8276 match(Set dst (SqrtVF (LoadVector mem))); 8277 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8278 ins_encode %{ 8279 int vector_len = 0; 8280 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8286 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8287 match(Set dst (SqrtVF src)); 8288 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8289 ins_encode %{ 8290 int vector_len = 1; 8291 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8292 %} 8293 ins_pipe( pipe_slow ); 8294 %} 8295 8296 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8297 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8298 match(Set dst (SqrtVF (LoadVector mem))); 8299 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8300 ins_encode %{ 8301 int vector_len = 1; 8302 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8303 %} 8304 ins_pipe( pipe_slow ); 8305 %} 8306 8307 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8308 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8309 match(Set dst (SqrtVF src)); 8310 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8311 ins_encode %{ 8312 int vector_len = 2; 8313 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8314 %} 8315 ins_pipe( pipe_slow ); 8316 %} 8317 8318 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8319 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8320 match(Set dst (SqrtVF (LoadVector mem))); 8321 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8322 ins_encode %{ 8323 int vector_len = 2; 8324 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 // ------------------------------ Shift --------------------------------------- 8330 8331 // Left and right shift count vectors are the same on x86 8332 // (only lowest bits of xmm reg are used for count). 8333 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8334 match(Set dst (LShiftCntV cnt)); 8335 match(Set dst (RShiftCntV cnt)); 8336 format %{ "movdl $dst,$cnt\t! load shift count" %} 8337 ins_encode %{ 8338 __ movdl($dst$$XMMRegister, $cnt$$Register); 8339 %} 8340 ins_pipe( pipe_slow ); 8341 %} 8342 8343 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{ 8344 match(Set dst cnt); 8345 effect(TEMP tmp); 8346 format %{ "movl $tmp,$cnt\t" 8347 "movdl $dst,$tmp\t! load shift count" %} 8348 ins_encode %{ 8349 __ movl($tmp$$Register, $cnt$$constant); 8350 __ movdl($dst$$XMMRegister, $tmp$$Register); 8351 %} 8352 ins_pipe( pipe_slow ); 8353 %} 8354 8355 // Byte vector shift 8356 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{ 8357 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8358 match(Set dst (LShiftVB src shift)); 8359 match(Set dst (RShiftVB src shift)); 8360 match(Set dst (URShiftVB src shift)); 8361 effect(TEMP dst, TEMP tmp, TEMP scratch); 8362 format %{"vextendbw $tmp,$src\n\t" 8363 "vshiftw $tmp,$shift\n\t" 8364 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8365 "pand $dst,$tmp\n\t" 8366 "packuswb $dst,$dst\n\t ! packed4B shift" %} 8367 ins_encode %{ 8368 int opcode = this->as_Mach()->ideal_Opcode(); 8369 8370 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 8371 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 8372 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8373 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 8374 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 8375 %} 8376 ins_pipe( pipe_slow ); 8377 %} 8378 8379 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{ 8380 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 8381 match(Set dst (LShiftVB src shift)); 8382 match(Set dst (RShiftVB src shift)); 8383 match(Set dst (URShiftVB src shift)); 8384 effect(TEMP dst, TEMP tmp, TEMP scratch); 8385 format %{"vextendbw $tmp,$src\n\t" 8386 "vshiftw $tmp,$shift\n\t" 8387 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8388 "pand $dst,$tmp\n\t" 8389 "packuswb $dst,$dst\n\t ! packed8B shift" %} 8390 ins_encode %{ 8391 int opcode = this->as_Mach()->ideal_Opcode(); 8392 8393 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 8394 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 8395 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8396 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 8397 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{ 8403 predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16); 8404 match(Set dst (LShiftVB src shift)); 8405 match(Set dst (RShiftVB src shift)); 8406 match(Set dst (URShiftVB src shift)); 8407 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 8408 format %{"vextendbw $tmp1,$src\n\t" 8409 "vshiftw $tmp1,$shift\n\t" 8410 "pshufd $tmp2,$src\n\t" 8411 "vextendbw $tmp2,$tmp2\n\t" 8412 "vshiftw $tmp2,$shift\n\t" 8413 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8414 "pand $tmp2,$dst\n\t" 8415 "pand $dst,$tmp1\n\t" 8416 "packuswb $dst,$tmp2\n\t! packed16B shift" %} 8417 ins_encode %{ 8418 int opcode = this->as_Mach()->ideal_Opcode(); 8419 8420 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 8421 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 8422 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 8423 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 8424 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 8425 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8426 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 8427 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 8428 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 8429 %} 8430 ins_pipe( pipe_slow ); 8431 %} 8432 8433 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8434 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8435 match(Set dst (LShiftVB src shift)); 8436 match(Set dst (RShiftVB src shift)); 8437 match(Set dst (URShiftVB src shift)); 8438 effect(TEMP dst, TEMP tmp, TEMP scratch); 8439 format %{"vextendbw $tmp,$src\n\t" 8440 "vshiftw $tmp,$tmp,$shift\n\t" 8441 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8442 "vextracti128_high $dst,$tmp\n\t" 8443 "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %} 8444 ins_encode %{ 8445 int opcode = this->as_Mach()->ideal_Opcode(); 8446 8447 int vector_len = 1; 8448 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 8449 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8450 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8451 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 8452 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 8457 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8458 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 8459 match(Set dst (LShiftVB src shift)); 8460 match(Set dst (RShiftVB src shift)); 8461 match(Set dst (URShiftVB src shift)); 8462 effect(TEMP dst, TEMP tmp, TEMP scratch); 8463 format %{"vextracti128_high $tmp,$src\n\t" 8464 "vextendbw $tmp,$tmp\n\t" 8465 "vextendbw $dst,$src\n\t" 8466 "vshiftw $tmp,$tmp,$shift\n\t" 8467 "vshiftw $dst,$dst,$shift\n\t" 8468 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8469 "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t" 8470 "vpackuswb $dst,$dst,$tmp\n\t" 8471 "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %} 8472 ins_encode %{ 8473 int opcode = this->as_Mach()->ideal_Opcode(); 8474 8475 int vector_len = 1; 8476 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 8477 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8478 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 8479 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8480 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 8481 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8482 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8483 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8484 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 8490 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 8491 match(Set dst (LShiftVB src shift)); 8492 match(Set dst (RShiftVB src shift)); 8493 match(Set dst (URShiftVB src shift)); 8494 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 8495 format %{"vextracti64x4 $tmp1,$src\n\t" 8496 "vextendbw $tmp1,$tmp1\n\t" 8497 "vextendbw $tmp2,$src\n\t" 8498 "vshiftw $tmp1,$tmp1,$shift\n\t" 8499 "vshiftw $tmp2,$tmp2,$shift\n\t" 8500 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8501 "vpbroadcastd $dst,$dst\n\t" 8502 "vpand $tmp1,$tmp1,$dst\n\t" 8503 "vpand $tmp2,$tmp2,$dst\n\t" 8504 "vpackuswb $dst,$tmp1,$tmp2\n\t" 8505 "evmovdquq $tmp2, [0x0604020007050301]\n\t" 8506 "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %} 8507 ins_encode %{ 8508 int opcode = this->as_Mach()->ideal_Opcode(); 8509 8510 int vector_len = 2; 8511 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 8512 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8513 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 8514 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 8515 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 8516 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8517 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 8518 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 8519 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 8520 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8521 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 8522 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 8523 %} 8524 ins_pipe( pipe_slow ); 8525 %} 8526 8527 // Shorts vector logical right shift produces incorrect Java result 8528 // for negative data because java code convert short value into int with 8529 // sign extension before a shift. But char vectors are fine since chars are 8530 // unsigned values. 8531 // Shorts/Chars vector left shift 8532 instruct vshist2S(vecS dst, vecS src, vecS shift) %{ 8533 predicate(n->as_Vector()->length() == 2); 8534 match(Set dst (LShiftVS src shift)); 8535 match(Set dst (RShiftVS src shift)); 8536 match(Set dst (URShiftVS src shift)); 8537 format %{ "vshiftw $dst,$src,$shift\t! shift packed2S" %} 8538 ins_encode %{ 8539 int opcode = this->as_Mach()->ideal_Opcode(); 8540 if (UseAVX == 0) { 8541 if ($dst$$XMMRegister != $src$$XMMRegister) 8542 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8543 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8544 } else { 8545 int vector_len = 0; 8546 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8547 } 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 instruct vshift4S(vecD dst, vecD src, vecS shift) %{ 8553 predicate(n->as_Vector()->length() == 4); 8554 match(Set dst (LShiftVS src shift)); 8555 match(Set dst (RShiftVS src shift)); 8556 match(Set dst (URShiftVS src shift)); 8557 format %{ "vshiftw $dst,$src,$shift\t! shift packed4S" %} 8558 ins_encode %{ 8559 int opcode = this->as_Mach()->ideal_Opcode(); 8560 if (UseAVX == 0) { 8561 if ($dst$$XMMRegister != $src$$XMMRegister) 8562 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8563 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8564 8565 } else { 8566 int vector_len = 0; 8567 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8568 } 8569 %} 8570 ins_pipe( pipe_slow ); 8571 %} 8572 8573 instruct vshift8S(vecX dst, vecX src, vecS shift) %{ 8574 predicate(n->as_Vector()->length() == 8); 8575 match(Set dst (LShiftVS src shift)); 8576 match(Set dst (RShiftVS src shift)); 8577 match(Set dst (URShiftVS src shift)); 8578 format %{ "vshiftw $dst,$src,$shift\t! shift packed8S" %} 8579 ins_encode %{ 8580 int opcode = this->as_Mach()->ideal_Opcode(); 8581 if (UseAVX == 0) { 8582 if ($dst$$XMMRegister != $src$$XMMRegister) 8583 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8584 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8585 } else { 8586 int vector_len = 0; 8587 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8588 } 8589 %} 8590 ins_pipe( pipe_slow ); 8591 %} 8592 8593 instruct vshift16S(vecY dst, vecY src, vecS shift) %{ 8594 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8595 match(Set dst (LShiftVS src shift)); 8596 match(Set dst (RShiftVS src shift)); 8597 match(Set dst (URShiftVS src shift)); 8598 format %{ "vshiftw $dst,$src,$shift\t! shift packed16S" %} 8599 ins_encode %{ 8600 int vector_len = 1; 8601 int opcode = this->as_Mach()->ideal_Opcode(); 8602 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8603 %} 8604 ins_pipe( pipe_slow ); 8605 %} 8606 8607 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{ 8608 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8609 match(Set dst (LShiftVS src shift)); 8610 match(Set dst (RShiftVS src shift)); 8611 match(Set dst (URShiftVS src shift)); 8612 format %{ "vshiftw $dst,$src,$shift\t! shift packed32S" %} 8613 ins_encode %{ 8614 int vector_len = 2; 8615 int opcode = this->as_Mach()->ideal_Opcode(); 8616 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8617 %} 8618 ins_pipe( pipe_slow ); 8619 %} 8620 8621 // Integers vector left shift 8622 instruct vshift2I(vecD dst, vecD src, vecS shift) %{ 8623 predicate(n->as_Vector()->length() == 2); 8624 match(Set dst (LShiftVI src shift)); 8625 match(Set dst (RShiftVI src shift)); 8626 match(Set dst (URShiftVI src shift)); 8627 format %{ "vshiftd $dst,$src,$shift\t! shift packed2I" %} 8628 ins_encode %{ 8629 int opcode = this->as_Mach()->ideal_Opcode(); 8630 if (UseAVX == 0) { 8631 if ($dst$$XMMRegister != $src$$XMMRegister) 8632 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8633 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8634 } else { 8635 int vector_len = 0; 8636 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8637 } 8638 %} 8639 ins_pipe( pipe_slow ); 8640 %} 8641 8642 instruct vshift4I(vecX dst, vecX src, vecS shift) %{ 8643 predicate(n->as_Vector()->length() == 4); 8644 match(Set dst (LShiftVI src shift)); 8645 match(Set dst (RShiftVI src shift)); 8646 match(Set dst (URShiftVI src shift)); 8647 format %{ "vshiftd $dst,$src,$shift\t! shift packed4I" %} 8648 ins_encode %{ 8649 int opcode = this->as_Mach()->ideal_Opcode(); 8650 if (UseAVX == 0) { 8651 if ($dst$$XMMRegister != $src$$XMMRegister) 8652 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8653 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8654 } else { 8655 int vector_len = 0; 8656 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8657 } 8658 %} 8659 ins_pipe( pipe_slow ); 8660 %} 8661 8662 instruct vshift8I(vecY dst, vecY src, vecS shift) %{ 8663 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8664 match(Set dst (LShiftVI src shift)); 8665 match(Set dst (RShiftVI src shift)); 8666 match(Set dst (URShiftVI src shift)); 8667 format %{ "vshiftd $dst,$src,$shift\t! shift packed8I" %} 8668 ins_encode %{ 8669 int vector_len = 1; 8670 int opcode = this->as_Mach()->ideal_Opcode(); 8671 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 8676 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{ 8677 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8678 match(Set dst (LShiftVI src shift)); 8679 match(Set dst (RShiftVI src shift)); 8680 match(Set dst (URShiftVI src shift)); 8681 format %{ "vshiftd $dst,$src,$shift\t! shift packed16I" %} 8682 ins_encode %{ 8683 int vector_len = 2; 8684 int opcode = this->as_Mach()->ideal_Opcode(); 8685 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 // Longs vector shift 8691 instruct vshift2L(vecX dst, vecX src, vecS shift) %{ 8692 predicate(n->as_Vector()->length() == 2); 8693 match(Set dst (LShiftVL src shift)); 8694 match(Set dst (URShiftVL src shift)); 8695 format %{ "vshiftq $dst,$src,$shift\t! shift packed2L" %} 8696 ins_encode %{ 8697 int opcode = this->as_Mach()->ideal_Opcode(); 8698 if (UseAVX == 0) { 8699 if ($dst$$XMMRegister != $src$$XMMRegister) 8700 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8701 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8702 } else { 8703 int vector_len = 0; 8704 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8705 } 8706 %} 8707 ins_pipe( pipe_slow ); 8708 %} 8709 8710 instruct vshift4L(vecY dst, vecY src, vecS shift) %{ 8711 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8712 match(Set dst (LShiftVL src shift)); 8713 match(Set dst (URShiftVL src shift)); 8714 format %{ "vshiftq $dst,$src,$shift\t! left shift packed4L" %} 8715 ins_encode %{ 8716 int vector_len = 1; 8717 int opcode = this->as_Mach()->ideal_Opcode(); 8718 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8719 %} 8720 ins_pipe( pipe_slow ); 8721 %} 8722 8723 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{ 8724 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8725 match(Set dst (LShiftVL src shift)); 8726 match(Set dst (RShiftVL src shift)); 8727 match(Set dst (URShiftVL src shift)); 8728 format %{ "vshiftq $dst,$src,$shift\t! shift packed8L" %} 8729 ins_encode %{ 8730 int vector_len = 2; 8731 int opcode = this->as_Mach()->ideal_Opcode(); 8732 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8733 %} 8734 ins_pipe( pipe_slow ); 8735 %} 8736 8737 // -------------------ArithmeticRightShift ----------------------------------- 8738 // Long vector arithmetic right shift 8739 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8740 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 8741 match(Set dst (RShiftVL src shift)); 8742 effect(TEMP dst, TEMP tmp, TEMP scratch); 8743 format %{ "movdqu $dst,$src\n\t" 8744 "psrlq $dst,$shift\n\t" 8745 "movdqu $tmp,[0x8000000000000000]\n\t" 8746 "psrlq $tmp,$shift\n\t" 8747 "pxor $dst,$tmp\n\t" 8748 "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} 8749 ins_encode %{ 8750 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8751 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8752 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8753 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 8754 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 8755 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 8756 %} 8757 ins_pipe( pipe_slow ); 8758 %} 8759 8760 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ 8761 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 8762 match(Set dst (RShiftVL src shift)); 8763 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 8764 ins_encode %{ 8765 int vector_len = 0; 8766 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8767 %} 8768 ins_pipe( pipe_slow ); 8769 %} 8770 8771 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8772 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8773 match(Set dst (RShiftVL src shift)); 8774 effect(TEMP dst, TEMP tmp, TEMP scratch); 8775 format %{ "vpsrlq $dst,$src,$shift\n\t" 8776 "vmovdqu $tmp,[0x8000000000000000]\n\t" 8777 "vpsrlq $tmp,$tmp,$shift\n\t" 8778 "vpxor $dst,$dst,$tmp\n\t" 8779 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 8780 ins_encode %{ 8781 int vector_len = 1; 8782 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8783 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8784 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8785 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8786 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8787 %} 8788 ins_pipe( pipe_slow ); 8789 %} 8790 8791 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ 8792 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 8793 match(Set dst (RShiftVL src shift)); 8794 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} 8795 ins_encode %{ 8796 int vector_len = 1; 8797 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8798 %} 8799 ins_pipe( pipe_slow ); 8800 %} 8801 8802 // --------------------------------- AND -------------------------------------- 8803 8804 instruct vand4B(vecS dst, vecS src) %{ 8805 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 8806 match(Set dst (AndV dst src)); 8807 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8808 ins_encode %{ 8809 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8810 %} 8811 ins_pipe( pipe_slow ); 8812 %} 8813 8814 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8815 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8816 match(Set dst (AndV src1 src2)); 8817 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8818 ins_encode %{ 8819 int vector_len = 0; 8820 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8821 %} 8822 ins_pipe( pipe_slow ); 8823 %} 8824 8825 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8826 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8827 match(Set dst (AndV src (LoadVector mem))); 8828 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8829 ins_encode %{ 8830 int vector_len = 0; 8831 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8832 %} 8833 ins_pipe( pipe_slow ); 8834 %} 8835 8836 instruct vand8B(vecD dst, vecD src) %{ 8837 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 8838 match(Set dst (AndV dst src)); 8839 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8840 ins_encode %{ 8841 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8842 %} 8843 ins_pipe( pipe_slow ); 8844 %} 8845 8846 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8847 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8848 match(Set dst (AndV src1 src2)); 8849 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8850 ins_encode %{ 8851 int vector_len = 0; 8852 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8853 %} 8854 ins_pipe( pipe_slow ); 8855 %} 8856 8857 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8858 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8859 match(Set dst (AndV src (LoadVector mem))); 8860 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8861 ins_encode %{ 8862 int vector_len = 0; 8863 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8864 %} 8865 ins_pipe( pipe_slow ); 8866 %} 8867 8868 instruct vand16B(vecX dst, vecX src) %{ 8869 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 8870 match(Set dst (AndV dst src)); 8871 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8872 ins_encode %{ 8873 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8874 %} 8875 ins_pipe( pipe_slow ); 8876 %} 8877 8878 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8879 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8880 match(Set dst (AndV src1 src2)); 8881 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8882 ins_encode %{ 8883 int vector_len = 0; 8884 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8885 %} 8886 ins_pipe( pipe_slow ); 8887 %} 8888 8889 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8890 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8891 match(Set dst (AndV src (LoadVector mem))); 8892 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8893 ins_encode %{ 8894 int vector_len = 0; 8895 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8896 %} 8897 ins_pipe( pipe_slow ); 8898 %} 8899 8900 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8901 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8902 match(Set dst (AndV src1 src2)); 8903 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8904 ins_encode %{ 8905 int vector_len = 1; 8906 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8907 %} 8908 ins_pipe( pipe_slow ); 8909 %} 8910 8911 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8912 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8913 match(Set dst (AndV src (LoadVector mem))); 8914 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8915 ins_encode %{ 8916 int vector_len = 1; 8917 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8918 %} 8919 ins_pipe( pipe_slow ); 8920 %} 8921 8922 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8923 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8924 match(Set dst (AndV src1 src2)); 8925 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8926 ins_encode %{ 8927 int vector_len = 2; 8928 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 8934 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8935 match(Set dst (AndV src (LoadVector mem))); 8936 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 8937 ins_encode %{ 8938 int vector_len = 2; 8939 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 // --------------------------------- OR --------------------------------------- 8945 8946 instruct vor4B(vecS dst, vecS src) %{ 8947 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 8948 match(Set dst (OrV dst src)); 8949 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8950 ins_encode %{ 8951 __ por($dst$$XMMRegister, $src$$XMMRegister); 8952 %} 8953 ins_pipe( pipe_slow ); 8954 %} 8955 8956 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8957 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8958 match(Set dst (OrV src1 src2)); 8959 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8960 ins_encode %{ 8961 int vector_len = 0; 8962 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8963 %} 8964 ins_pipe( pipe_slow ); 8965 %} 8966 8967 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8968 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8969 match(Set dst (OrV src (LoadVector mem))); 8970 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8971 ins_encode %{ 8972 int vector_len = 0; 8973 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8974 %} 8975 ins_pipe( pipe_slow ); 8976 %} 8977 8978 instruct vor8B(vecD dst, vecD src) %{ 8979 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 8980 match(Set dst (OrV dst src)); 8981 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8982 ins_encode %{ 8983 __ por($dst$$XMMRegister, $src$$XMMRegister); 8984 %} 8985 ins_pipe( pipe_slow ); 8986 %} 8987 8988 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8989 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8990 match(Set dst (OrV src1 src2)); 8991 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8992 ins_encode %{ 8993 int vector_len = 0; 8994 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8995 %} 8996 ins_pipe( pipe_slow ); 8997 %} 8998 8999 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9000 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9001 match(Set dst (OrV src (LoadVector mem))); 9002 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9003 ins_encode %{ 9004 int vector_len = 0; 9005 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9006 %} 9007 ins_pipe( pipe_slow ); 9008 %} 9009 9010 instruct vor16B(vecX dst, vecX src) %{ 9011 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9012 match(Set dst (OrV dst src)); 9013 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9014 ins_encode %{ 9015 __ por($dst$$XMMRegister, $src$$XMMRegister); 9016 %} 9017 ins_pipe( pipe_slow ); 9018 %} 9019 9020 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9021 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9022 match(Set dst (OrV src1 src2)); 9023 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9024 ins_encode %{ 9025 int vector_len = 0; 9026 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9027 %} 9028 ins_pipe( pipe_slow ); 9029 %} 9030 9031 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9032 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9033 match(Set dst (OrV src (LoadVector mem))); 9034 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9035 ins_encode %{ 9036 int vector_len = 0; 9037 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9038 %} 9039 ins_pipe( pipe_slow ); 9040 %} 9041 9042 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9043 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9044 match(Set dst (OrV src1 src2)); 9045 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9046 ins_encode %{ 9047 int vector_len = 1; 9048 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9049 %} 9050 ins_pipe( pipe_slow ); 9051 %} 9052 9053 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9054 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9055 match(Set dst (OrV src (LoadVector mem))); 9056 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9057 ins_encode %{ 9058 int vector_len = 1; 9059 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9060 %} 9061 ins_pipe( pipe_slow ); 9062 %} 9063 9064 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9065 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9066 match(Set dst (OrV src1 src2)); 9067 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9068 ins_encode %{ 9069 int vector_len = 2; 9070 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9071 %} 9072 ins_pipe( pipe_slow ); 9073 %} 9074 9075 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9076 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9077 match(Set dst (OrV src (LoadVector mem))); 9078 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9079 ins_encode %{ 9080 int vector_len = 2; 9081 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9082 %} 9083 ins_pipe( pipe_slow ); 9084 %} 9085 9086 // --------------------------------- XOR -------------------------------------- 9087 9088 instruct vxor4B(vecS dst, vecS src) %{ 9089 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9090 match(Set dst (XorV dst src)); 9091 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9092 ins_encode %{ 9093 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9094 %} 9095 ins_pipe( pipe_slow ); 9096 %} 9097 9098 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9099 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9100 match(Set dst (XorV src1 src2)); 9101 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9102 ins_encode %{ 9103 int vector_len = 0; 9104 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9110 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9111 match(Set dst (XorV src (LoadVector mem))); 9112 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9113 ins_encode %{ 9114 int vector_len = 0; 9115 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 instruct vxor8B(vecD dst, vecD src) %{ 9121 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9122 match(Set dst (XorV dst src)); 9123 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9124 ins_encode %{ 9125 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9126 %} 9127 ins_pipe( pipe_slow ); 9128 %} 9129 9130 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9131 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9132 match(Set dst (XorV src1 src2)); 9133 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9134 ins_encode %{ 9135 int vector_len = 0; 9136 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9142 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9143 match(Set dst (XorV src (LoadVector mem))); 9144 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9145 ins_encode %{ 9146 int vector_len = 0; 9147 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9148 %} 9149 ins_pipe( pipe_slow ); 9150 %} 9151 9152 instruct vxor16B(vecX dst, vecX src) %{ 9153 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9154 match(Set dst (XorV dst src)); 9155 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9156 ins_encode %{ 9157 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9158 %} 9159 ins_pipe( pipe_slow ); 9160 %} 9161 9162 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9163 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9164 match(Set dst (XorV src1 src2)); 9165 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9166 ins_encode %{ 9167 int vector_len = 0; 9168 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9169 %} 9170 ins_pipe( pipe_slow ); 9171 %} 9172 9173 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9174 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9175 match(Set dst (XorV src (LoadVector mem))); 9176 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9177 ins_encode %{ 9178 int vector_len = 0; 9179 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9180 %} 9181 ins_pipe( pipe_slow ); 9182 %} 9183 9184 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9185 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9186 match(Set dst (XorV src1 src2)); 9187 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9188 ins_encode %{ 9189 int vector_len = 1; 9190 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9191 %} 9192 ins_pipe( pipe_slow ); 9193 %} 9194 9195 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9196 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9197 match(Set dst (XorV src (LoadVector mem))); 9198 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9199 ins_encode %{ 9200 int vector_len = 1; 9201 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9207 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9208 match(Set dst (XorV src1 src2)); 9209 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9210 ins_encode %{ 9211 int vector_len = 2; 9212 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9213 %} 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9218 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9219 match(Set dst (XorV src (LoadVector mem))); 9220 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9221 ins_encode %{ 9222 int vector_len = 2; 9223 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9224 %} 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 // --------------------------------- ABS -------------------------------------- 9229 // a = |a| 9230 instruct vabs4B_reg(vecS dst, vecS src) %{ 9231 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9232 match(Set dst (AbsVB src)); 9233 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %} 9234 ins_encode %{ 9235 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9236 %} 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 instruct vabs8B_reg(vecD dst, vecD src) %{ 9241 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9242 match(Set dst (AbsVB src)); 9243 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 9244 ins_encode %{ 9245 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vabs16B_reg(vecX dst, vecX src) %{ 9251 predicate(UseSSE > 2 && n->as_Vector()->length() == 16); 9252 match(Set dst (AbsVB src)); 9253 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 9254 ins_encode %{ 9255 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vabs32B_reg(vecY dst, vecY src) %{ 9261 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 9262 match(Set dst (AbsVB src)); 9263 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 9264 ins_encode %{ 9265 int vector_len = 1; 9266 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 9272 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 9273 match(Set dst (AbsVB src)); 9274 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 9275 ins_encode %{ 9276 int vector_len = 2; 9277 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 instruct vabs2S_reg(vecD dst, vecD src) %{ 9283 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9284 match(Set dst (AbsVS src)); 9285 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %} 9286 ins_encode %{ 9287 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9288 %} 9289 ins_pipe( pipe_slow ); 9290 %} 9291 9292 instruct vabs4S_reg(vecD dst, vecD src) %{ 9293 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9294 match(Set dst (AbsVS src)); 9295 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 9296 ins_encode %{ 9297 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9298 %} 9299 ins_pipe( pipe_slow ); 9300 %} 9301 9302 instruct vabs8S_reg(vecX dst, vecX src) %{ 9303 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9304 match(Set dst (AbsVS src)); 9305 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 9306 ins_encode %{ 9307 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vabs16S_reg(vecY dst, vecY src) %{ 9313 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9314 match(Set dst (AbsVS src)); 9315 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 9316 ins_encode %{ 9317 int vector_len = 1; 9318 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9319 %} 9320 ins_pipe( pipe_slow ); 9321 %} 9322 9323 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 9324 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 9325 match(Set dst (AbsVS src)); 9326 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 9327 ins_encode %{ 9328 int vector_len = 2; 9329 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vabs2I_reg(vecD dst, vecD src) %{ 9335 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9336 match(Set dst (AbsVI src)); 9337 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 9338 ins_encode %{ 9339 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct vabs4I_reg(vecX dst, vecX src) %{ 9345 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9346 match(Set dst (AbsVI src)); 9347 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 9348 ins_encode %{ 9349 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9350 %} 9351 ins_pipe( pipe_slow ); 9352 %} 9353 9354 instruct vabs8I_reg(vecY dst, vecY src) %{ 9355 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9356 match(Set dst (AbsVI src)); 9357 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 9358 ins_encode %{ 9359 int vector_len = 1; 9360 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9361 %} 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 9366 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9367 match(Set dst (AbsVI src)); 9368 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 9369 ins_encode %{ 9370 int vector_len = 2; 9371 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vabs2L_reg(vecX dst, vecX src) %{ 9377 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 9378 match(Set dst (AbsVL src)); 9379 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 9380 ins_encode %{ 9381 int vector_len = 0; 9382 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9383 %} 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 instruct vabs4L_reg(vecY dst, vecY src) %{ 9388 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 9389 match(Set dst (AbsVL src)); 9390 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 9391 ins_encode %{ 9392 int vector_len = 1; 9393 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9394 %} 9395 ins_pipe( pipe_slow ); 9396 %} 9397 9398 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 9399 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9400 match(Set dst (AbsVL src)); 9401 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 9402 ins_encode %{ 9403 int vector_len = 2; 9404 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9405 %} 9406 ins_pipe( pipe_slow ); 9407 %} 9408 9409 // --------------------------------- ABSNEG -------------------------------------- 9410 9411 instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{ 9412 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 9413 match(Set dst (AbsVD src)); 9414 match(Set dst (NegVD src)); 9415 effect(TEMP scratch); 9416 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed2D" %} 9417 ins_encode %{ 9418 int opcode = this->as_Mach()->ideal_Opcode(); 9419 if ($dst$$XMMRegister != $src$$XMMRegister) 9420 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9421 __ vabsnegd(opcode, $dst$$XMMRegister, $scratch$$Register); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{ 9427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9428 match(Set dst (AbsVD src)); 9429 match(Set dst (NegVD src)); 9430 effect(TEMP scratch); 9431 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed4D" %} 9432 ins_encode %{ 9433 int opcode = this->as_Mach()->ideal_Opcode(); 9434 int vector_len = 1; 9435 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9436 %} 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{ 9441 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9442 match(Set dst (AbsVD src)); 9443 match(Set dst (NegVD src)); 9444 effect(TEMP scratch); 9445 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed8D" %} 9446 ins_encode %{ 9447 int opcode = this->as_Mach()->ideal_Opcode(); 9448 int vector_len = 2; 9449 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{ 9455 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 9456 match(Set dst (AbsVF src)); 9457 match(Set dst (NegVF src)); 9458 effect(TEMP scratch); 9459 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed2F" %} 9460 ins_encode %{ 9461 int opcode = this->as_Mach()->ideal_Opcode(); 9462 if ($dst$$XMMRegister != $src$$XMMRegister) 9463 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9464 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); 9465 %} 9466 ins_pipe( pipe_slow ); 9467 %} 9468 9469 instruct vabsneg4F(vecX dst, rRegI scratch) %{ 9470 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 9471 match(Set dst (AbsVF dst)); 9472 match(Set dst (NegVF dst)); 9473 effect(TEMP scratch); 9474 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 9475 ins_cost(150); 9476 ins_encode %{ 9477 int opcode = this->as_Mach()->ideal_Opcode(); 9478 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); 9479 %} 9480 ins_pipe( pipe_slow ); 9481 %} 9482 9483 instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{ 9484 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9485 match(Set dst (AbsVF src)); 9486 match(Set dst (NegVF src)); 9487 effect(TEMP scratch); 9488 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed8F" %} 9489 ins_cost(150); 9490 ins_encode %{ 9491 int opcode = this->as_Mach()->ideal_Opcode(); 9492 int vector_len = 1; 9493 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9494 %} 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{ 9499 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9500 match(Set dst (AbsVF src)); 9501 match(Set dst (NegVF src)); 9502 effect(TEMP scratch); 9503 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed16F" %} 9504 ins_cost(150); 9505 ins_encode %{ 9506 int opcode = this->as_Mach()->ideal_Opcode(); 9507 int vector_len = 2; 9508 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 // --------------------------------- FMA -------------------------------------- 9514 9515 // a * b + c 9516 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9517 predicate(UseFMA && n->as_Vector()->length() == 2); 9518 match(Set c (FmaVD c (Binary a b))); 9519 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9520 ins_cost(150); 9521 ins_encode %{ 9522 int vector_len = 0; 9523 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9524 %} 9525 ins_pipe( pipe_slow ); 9526 %} 9527 9528 // a * b + c 9529 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9530 predicate(UseFMA && n->as_Vector()->length() == 2); 9531 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9532 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9533 ins_cost(150); 9534 ins_encode %{ 9535 int vector_len = 0; 9536 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 9542 // a * b + c 9543 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9544 predicate(UseFMA && n->as_Vector()->length() == 4); 9545 match(Set c (FmaVD c (Binary a b))); 9546 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9547 ins_cost(150); 9548 ins_encode %{ 9549 int vector_len = 1; 9550 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 // a * b + c 9556 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9557 predicate(UseFMA && n->as_Vector()->length() == 4); 9558 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9559 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9560 ins_cost(150); 9561 ins_encode %{ 9562 int vector_len = 1; 9563 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9564 %} 9565 ins_pipe( pipe_slow ); 9566 %} 9567 9568 // a * b + c 9569 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9570 predicate(UseFMA && n->as_Vector()->length() == 8); 9571 match(Set c (FmaVD c (Binary a b))); 9572 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9573 ins_cost(150); 9574 ins_encode %{ 9575 int vector_len = 2; 9576 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 // a * b + c 9582 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9583 predicate(UseFMA && n->as_Vector()->length() == 8); 9584 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9585 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9586 ins_cost(150); 9587 ins_encode %{ 9588 int vector_len = 2; 9589 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9590 %} 9591 ins_pipe( pipe_slow ); 9592 %} 9593 9594 // a * b + c 9595 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9596 predicate(UseFMA && n->as_Vector()->length() == 4); 9597 match(Set c (FmaVF c (Binary a b))); 9598 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9599 ins_cost(150); 9600 ins_encode %{ 9601 int vector_len = 0; 9602 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 // a * b + c 9608 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9609 predicate(UseFMA && n->as_Vector()->length() == 4); 9610 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9611 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9612 ins_cost(150); 9613 ins_encode %{ 9614 int vector_len = 0; 9615 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 // a * b + c 9621 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9622 predicate(UseFMA && n->as_Vector()->length() == 8); 9623 match(Set c (FmaVF c (Binary a b))); 9624 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9625 ins_cost(150); 9626 ins_encode %{ 9627 int vector_len = 1; 9628 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9629 %} 9630 ins_pipe( pipe_slow ); 9631 %} 9632 9633 // a * b + c 9634 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9635 predicate(UseFMA && n->as_Vector()->length() == 8); 9636 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9637 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9638 ins_cost(150); 9639 ins_encode %{ 9640 int vector_len = 1; 9641 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9642 %} 9643 ins_pipe( pipe_slow ); 9644 %} 9645 9646 // a * b + c 9647 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9648 predicate(UseFMA && n->as_Vector()->length() == 16); 9649 match(Set c (FmaVF c (Binary a b))); 9650 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9651 ins_cost(150); 9652 ins_encode %{ 9653 int vector_len = 2; 9654 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 // a * b + c 9660 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9661 predicate(UseFMA && n->as_Vector()->length() == 16); 9662 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9663 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9664 ins_cost(150); 9665 ins_encode %{ 9666 int vector_len = 2; 9667 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9668 %} 9669 ins_pipe( pipe_slow ); 9670 %} 9671 9672 // --------------------------------- Vector Multiply Add -------------------------------------- 9673 9674 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9675 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9676 match(Set dst (MulAddVS2VI dst src1)); 9677 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9678 ins_encode %{ 9679 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9680 %} 9681 ins_pipe( pipe_slow ); 9682 %} 9683 9684 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9685 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9686 match(Set dst (MulAddVS2VI src1 src2)); 9687 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9688 ins_encode %{ 9689 int vector_len = 0; 9690 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9691 %} 9692 ins_pipe( pipe_slow ); 9693 %} 9694 9695 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9696 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9697 match(Set dst (MulAddVS2VI dst src1)); 9698 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9699 ins_encode %{ 9700 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9706 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9707 match(Set dst (MulAddVS2VI src1 src2)); 9708 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9709 ins_encode %{ 9710 int vector_len = 0; 9711 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9712 %} 9713 ins_pipe( pipe_slow ); 9714 %} 9715 9716 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9717 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9718 match(Set dst (MulAddVS2VI src1 src2)); 9719 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9720 ins_encode %{ 9721 int vector_len = 1; 9722 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9723 %} 9724 ins_pipe( pipe_slow ); 9725 %} 9726 9727 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9728 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9729 match(Set dst (MulAddVS2VI src1 src2)); 9730 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9731 ins_encode %{ 9732 int vector_len = 2; 9733 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9734 %} 9735 ins_pipe( pipe_slow ); 9736 %} 9737 9738 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9739 9740 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9741 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9742 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9743 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9744 ins_encode %{ 9745 int vector_len = 0; 9746 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9747 %} 9748 ins_pipe( pipe_slow ); 9749 ins_cost(10); 9750 %} 9751 9752 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9753 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9754 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9755 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9756 ins_encode %{ 9757 int vector_len = 0; 9758 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9759 %} 9760 ins_pipe( pipe_slow ); 9761 ins_cost(10); 9762 %} 9763 9764 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9765 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9766 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9767 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9768 ins_encode %{ 9769 int vector_len = 1; 9770 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 ins_cost(10); 9774 %} 9775 9776 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9777 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9778 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9779 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9780 ins_encode %{ 9781 int vector_len = 2; 9782 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9783 %} 9784 ins_pipe( pipe_slow ); 9785 ins_cost(10); 9786 %} 9787 9788 // --------------------------------- PopCount -------------------------------------- 9789 9790 instruct vpopcount2I(vecD dst, vecD src) %{ 9791 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9792 match(Set dst (PopCountVI src)); 9793 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9794 ins_encode %{ 9795 int vector_len = 0; 9796 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9797 %} 9798 ins_pipe( pipe_slow ); 9799 %} 9800 9801 instruct vpopcount4I(vecX dst, vecX src) %{ 9802 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9803 match(Set dst (PopCountVI src)); 9804 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9805 ins_encode %{ 9806 int vector_len = 0; 9807 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9808 %} 9809 ins_pipe( pipe_slow ); 9810 %} 9811 9812 instruct vpopcount8I(vecY dst, vecY src) %{ 9813 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 9814 match(Set dst (PopCountVI src)); 9815 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 9816 ins_encode %{ 9817 int vector_len = 1; 9818 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9819 %} 9820 ins_pipe( pipe_slow ); 9821 %} 9822 9823 instruct vpopcount16I(vecZ dst, vecZ src) %{ 9824 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 9825 match(Set dst (PopCountVI src)); 9826 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 9827 ins_encode %{ 9828 int vector_len = 2; 9829 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9830 %} 9831 ins_pipe( pipe_slow ); 9832 %}