1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1425 case Op_CompareAndSwapL: 1426 #ifdef _LP64 1427 case Op_CompareAndSwapP: 1428 #endif 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 } 1454 1455 return ret_value; // Per default match rules are supported. 1456 } 1457 1458 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1459 // identify extra cases that we might want to provide match rules for 1460 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1461 bool ret_value = match_rule_supported(opcode); 1462 if (ret_value) { 1463 switch (opcode) { 1464 case Op_AddVB: 1465 case Op_SubVB: 1466 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1467 ret_value = false; 1468 break; 1469 case Op_URShiftVS: 1470 case Op_RShiftVS: 1471 case Op_LShiftVS: 1472 case Op_MulVS: 1473 case Op_AddVS: 1474 case Op_SubVS: 1475 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1476 ret_value = false; 1477 break; 1478 case Op_CMoveVF: 1479 if (vlen != 8) 1480 ret_value = false; 1481 break; 1482 case Op_CMoveVD: 1483 if (vlen != 4) 1484 ret_value = false; 1485 break; 1486 } 1487 } 1488 1489 return ret_value; // Per default match rules are supported. 1490 } 1491 1492 const bool Matcher::has_predicated_vectors(void) { 1493 bool ret_value = false; 1494 if (UseAVX > 2) { 1495 ret_value = VM_Version::supports_avx512vl(); 1496 } 1497 1498 return ret_value; 1499 } 1500 1501 const int Matcher::float_pressure(int default_pressure_threshold) { 1502 int float_pressure_threshold = default_pressure_threshold; 1503 #ifdef _LP64 1504 if (UseAVX > 2) { 1505 // Increase pressure threshold on machines with AVX3 which have 1506 // 2x more XMM registers. 1507 float_pressure_threshold = default_pressure_threshold * 2; 1508 } 1509 #endif 1510 return float_pressure_threshold; 1511 } 1512 1513 // Max vector size in bytes. 0 if not supported. 1514 const int Matcher::vector_width_in_bytes(BasicType bt) { 1515 assert(is_java_primitive(bt), "only primitive type vectors"); 1516 if (UseSSE < 2) return 0; 1517 // SSE2 supports 128bit vectors for all types. 1518 // AVX2 supports 256bit vectors for all types. 1519 // AVX2/EVEX supports 512bit vectors for all types. 1520 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1521 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1522 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1523 size = (UseAVX > 2) ? 64 : 32; 1524 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1525 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1526 // Use flag to limit vector size. 1527 size = MIN2(size,(int)MaxVectorSize); 1528 // Minimum 2 values in vector (or 4 for bytes). 1529 switch (bt) { 1530 case T_DOUBLE: 1531 case T_LONG: 1532 if (size < 16) return 0; 1533 break; 1534 case T_FLOAT: 1535 case T_INT: 1536 if (size < 8) return 0; 1537 break; 1538 case T_BOOLEAN: 1539 if (size < 4) return 0; 1540 break; 1541 case T_CHAR: 1542 if (size < 4) return 0; 1543 break; 1544 case T_BYTE: 1545 if (size < 4) return 0; 1546 break; 1547 case T_SHORT: 1548 if (size < 4) return 0; 1549 break; 1550 default: 1551 ShouldNotReachHere(); 1552 } 1553 return size; 1554 } 1555 1556 // Limits on vector size (number of elements) loaded into vector. 1557 const int Matcher::max_vector_size(const BasicType bt) { 1558 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1559 } 1560 const int Matcher::min_vector_size(const BasicType bt) { 1561 int max_size = max_vector_size(bt); 1562 // Min size which can be loaded into vector is 4 bytes. 1563 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1564 return MIN2(size,max_size); 1565 } 1566 1567 // Vector ideal reg corresponding to specified size in bytes 1568 const uint Matcher::vector_ideal_reg(int size) { 1569 assert(MaxVectorSize >= size, ""); 1570 switch(size) { 1571 case 4: return Op_VecS; 1572 case 8: return Op_VecD; 1573 case 16: return Op_VecX; 1574 case 32: return Op_VecY; 1575 case 64: return Op_VecZ; 1576 } 1577 ShouldNotReachHere(); 1578 return 0; 1579 } 1580 1581 // Only lowest bits of xmm reg are used for vector shift count. 1582 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1583 return Op_VecS; 1584 } 1585 1586 // x86 supports misaligned vectors store/load. 1587 const bool Matcher::misaligned_vectors_ok() { 1588 return !AlignVector; // can be changed by flag 1589 } 1590 1591 // x86 AES instructions are compatible with SunJCE expanded 1592 // keys, hence we do not need to pass the original key to stubs 1593 const bool Matcher::pass_original_key_for_aes() { 1594 return false; 1595 } 1596 1597 1598 const bool Matcher::convi2l_type_required = true; 1599 1600 // Check for shift by small constant as well 1601 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1602 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1603 shift->in(2)->get_int() <= 3 && 1604 // Are there other uses besides address expressions? 1605 !matcher->is_visited(shift)) { 1606 address_visited.set(shift->_idx); // Flag as address_visited 1607 mstack.push(shift->in(2), Matcher::Visit); 1608 Node *conv = shift->in(1); 1609 #ifdef _LP64 1610 // Allow Matcher to match the rule which bypass 1611 // ConvI2L operation for an array index on LP64 1612 // if the index value is positive. 1613 if (conv->Opcode() == Op_ConvI2L && 1614 conv->as_Type()->type()->is_long()->_lo >= 0 && 1615 // Are there other uses besides address expressions? 1616 !matcher->is_visited(conv)) { 1617 address_visited.set(conv->_idx); // Flag as address_visited 1618 mstack.push(conv->in(1), Matcher::Pre_Visit); 1619 } else 1620 #endif 1621 mstack.push(conv, Matcher::Pre_Visit); 1622 return true; 1623 } 1624 return false; 1625 } 1626 1627 // Should the Matcher clone shifts on addressing modes, expecting them 1628 // to be subsumed into complex addressing expressions or compute them 1629 // into registers? 1630 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1631 Node *off = m->in(AddPNode::Offset); 1632 if (off->is_Con()) { 1633 address_visited.test_set(m->_idx); // Flag as address_visited 1634 Node *adr = m->in(AddPNode::Address); 1635 1636 // Intel can handle 2 adds in addressing mode 1637 // AtomicAdd is not an addressing expression. 1638 // Cheap to find it by looking for screwy base. 1639 if (adr->is_AddP() && 1640 !adr->in(AddPNode::Base)->is_top() && 1641 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1642 // Are there other uses besides address expressions? 1643 !is_visited(adr)) { 1644 address_visited.set(adr->_idx); // Flag as address_visited 1645 Node *shift = adr->in(AddPNode::Offset); 1646 if (!clone_shift(shift, this, mstack, address_visited)) { 1647 mstack.push(shift, Pre_Visit); 1648 } 1649 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1650 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1651 } else { 1652 mstack.push(adr, Pre_Visit); 1653 } 1654 1655 // Clone X+offset as it also folds into most addressing expressions 1656 mstack.push(off, Visit); 1657 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1658 return true; 1659 } else if (clone_shift(off, this, mstack, address_visited)) { 1660 address_visited.test_set(m->_idx); // Flag as address_visited 1661 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1662 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1663 return true; 1664 } 1665 return false; 1666 } 1667 1668 void Compile::reshape_address(AddPNode* addp) { 1669 } 1670 1671 // Helper methods for MachSpillCopyNode::implementation(). 1672 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1673 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1674 // In 64-bit VM size calculation is very complex. Emitting instructions 1675 // into scratch buffer is used to get size in 64-bit VM. 1676 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1677 assert(ireg == Op_VecS || // 32bit vector 1678 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1679 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1680 "no non-adjacent vector moves" ); 1681 if (cbuf) { 1682 MacroAssembler _masm(cbuf); 1683 int offset = __ offset(); 1684 switch (ireg) { 1685 case Op_VecS: // copy whole register 1686 case Op_VecD: 1687 case Op_VecX: 1688 #ifndef LP64 1689 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1690 #else 1691 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1692 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1693 } else { 1694 __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2); 1695 __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1696 } 1697 #endif 1698 break; 1699 case Op_VecY: 1700 #ifndef LP64 1701 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1702 #else 1703 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1704 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1705 } else { 1706 __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2); 1707 __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1708 } 1709 #endif 1710 break; 1711 case Op_VecZ: 1712 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1713 break; 1714 default: 1715 ShouldNotReachHere(); 1716 } 1717 int size = __ offset() - offset; 1718 #ifdef ASSERT 1719 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1720 assert(!do_size || size == 4, "incorrect size calculattion"); 1721 #endif 1722 return size; 1723 #ifndef PRODUCT 1724 } else if (!do_size) { 1725 switch (ireg) { 1726 case Op_VecS: 1727 case Op_VecD: 1728 case Op_VecX: 1729 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1730 break; 1731 case Op_VecY: 1732 case Op_VecZ: 1733 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1734 break; 1735 default: 1736 ShouldNotReachHere(); 1737 } 1738 #endif 1739 } 1740 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1741 return (UseAVX > 2) ? 6 : 4; 1742 } 1743 1744 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1745 int stack_offset, int reg, uint ireg, outputStream* st) { 1746 // In 64-bit VM size calculation is very complex. Emitting instructions 1747 // into scratch buffer is used to get size in 64-bit VM. 1748 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1749 if (cbuf) { 1750 MacroAssembler _masm(cbuf); 1751 int offset = __ offset(); 1752 if (is_load) { 1753 switch (ireg) { 1754 case Op_VecS: 1755 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1756 break; 1757 case Op_VecD: 1758 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1759 break; 1760 case Op_VecX: 1761 #ifndef LP64 1762 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1763 #else 1764 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1765 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1766 } else { 1767 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1768 __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1769 } 1770 #endif 1771 break; 1772 case Op_VecY: 1773 #ifndef LP64 1774 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1775 #else 1776 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1777 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1778 } else { 1779 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1780 __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1781 } 1782 #endif 1783 break; 1784 case Op_VecZ: 1785 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1786 break; 1787 default: 1788 ShouldNotReachHere(); 1789 } 1790 } else { // store 1791 switch (ireg) { 1792 case Op_VecS: 1793 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1794 break; 1795 case Op_VecD: 1796 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1797 break; 1798 case Op_VecX: 1799 #ifndef LP64 1800 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1801 #else 1802 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1803 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1804 } 1805 else { 1806 __ vextracti32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1807 } 1808 #endif 1809 break; 1810 case Op_VecY: 1811 #ifndef LP64 1812 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1813 #else 1814 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1815 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1816 } 1817 else { 1818 __ vextracti64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1819 } 1820 #endif 1821 break; 1822 case Op_VecZ: 1823 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1824 break; 1825 default: 1826 ShouldNotReachHere(); 1827 } 1828 } 1829 int size = __ offset() - offset; 1830 #ifdef ASSERT 1831 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1832 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1833 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1834 #endif 1835 return size; 1836 #ifndef PRODUCT 1837 } else if (!do_size) { 1838 if (is_load) { 1839 switch (ireg) { 1840 case Op_VecS: 1841 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1842 break; 1843 case Op_VecD: 1844 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1845 break; 1846 case Op_VecX: 1847 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1848 break; 1849 case Op_VecY: 1850 case Op_VecZ: 1851 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1852 break; 1853 default: 1854 ShouldNotReachHere(); 1855 } 1856 } else { // store 1857 switch (ireg) { 1858 case Op_VecS: 1859 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1860 break; 1861 case Op_VecD: 1862 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1863 break; 1864 case Op_VecX: 1865 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1866 break; 1867 case Op_VecY: 1868 case Op_VecZ: 1869 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1870 break; 1871 default: 1872 ShouldNotReachHere(); 1873 } 1874 } 1875 #endif 1876 } 1877 bool is_single_byte = false; 1878 int vec_len = 0; 1879 if ((UseAVX > 2) && (stack_offset != 0)) { 1880 int tuple_type = Assembler::EVEX_FVM; 1881 int input_size = Assembler::EVEX_32bit; 1882 switch (ireg) { 1883 case Op_VecS: 1884 tuple_type = Assembler::EVEX_T1S; 1885 break; 1886 case Op_VecD: 1887 tuple_type = Assembler::EVEX_T1S; 1888 input_size = Assembler::EVEX_64bit; 1889 break; 1890 case Op_VecX: 1891 break; 1892 case Op_VecY: 1893 vec_len = 1; 1894 break; 1895 case Op_VecZ: 1896 vec_len = 2; 1897 break; 1898 } 1899 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1900 } 1901 int offset_size = 0; 1902 int size = 5; 1903 if (UseAVX > 2 ) { 1904 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1905 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1906 size += 2; // Need an additional two bytes for EVEX encoding 1907 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1908 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1909 } else { 1910 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1911 size += 2; // Need an additional two bytes for EVEX encodding 1912 } 1913 } else { 1914 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1915 } 1916 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1917 return size+offset_size; 1918 } 1919 1920 static inline jint replicate4_imm(int con, int width) { 1921 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1922 assert(width == 1 || width == 2, "only byte or short types here"); 1923 int bit_width = width * 8; 1924 jint val = con; 1925 val &= (1 << bit_width) - 1; // mask off sign bits 1926 while(bit_width < 32) { 1927 val |= (val << bit_width); 1928 bit_width <<= 1; 1929 } 1930 return val; 1931 } 1932 1933 static inline jlong replicate8_imm(int con, int width) { 1934 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1935 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1936 int bit_width = width * 8; 1937 jlong val = con; 1938 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1939 while(bit_width < 64) { 1940 val |= (val << bit_width); 1941 bit_width <<= 1; 1942 } 1943 return val; 1944 } 1945 1946 #ifndef PRODUCT 1947 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1948 st->print("nop \t# %d bytes pad for loops and calls", _count); 1949 } 1950 #endif 1951 1952 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1953 MacroAssembler _masm(&cbuf); 1954 __ nop(_count); 1955 } 1956 1957 uint MachNopNode::size(PhaseRegAlloc*) const { 1958 return _count; 1959 } 1960 1961 #ifndef PRODUCT 1962 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1963 st->print("# breakpoint"); 1964 } 1965 #endif 1966 1967 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1968 MacroAssembler _masm(&cbuf); 1969 __ int3(); 1970 } 1971 1972 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1973 return MachNode::size(ra_); 1974 } 1975 1976 %} 1977 1978 encode %{ 1979 1980 enc_class call_epilog %{ 1981 if (VerifyStackAtCalls) { 1982 // Check that stack depth is unchanged: find majik cookie on stack 1983 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1984 MacroAssembler _masm(&cbuf); 1985 Label L; 1986 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1987 __ jccb(Assembler::equal, L); 1988 // Die if stack mismatch 1989 __ int3(); 1990 __ bind(L); 1991 } 1992 %} 1993 1994 %} 1995 1996 1997 //----------OPERANDS----------------------------------------------------------- 1998 // Operand definitions must precede instruction definitions for correct parsing 1999 // in the ADLC because operands constitute user defined types which are used in 2000 // instruction definitions. 2001 2002 operand vecZ() %{ 2003 constraint(ALLOC_IN_RC(vectorz_reg)); 2004 match(VecZ); 2005 2006 format %{ %} 2007 interface(REG_INTER); 2008 %} 2009 2010 operand legVecZ() %{ 2011 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2012 match(VecZ); 2013 2014 format %{ %} 2015 interface(REG_INTER); 2016 %} 2017 2018 // Comparison Code for FP conditional move 2019 operand cmpOp_vcmppd() %{ 2020 match(Bool); 2021 2022 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2023 n->as_Bool()->_test._test != BoolTest::no_overflow); 2024 format %{ "" %} 2025 interface(COND_INTER) %{ 2026 equal (0x0, "eq"); 2027 less (0x1, "lt"); 2028 less_equal (0x2, "le"); 2029 not_equal (0xC, "ne"); 2030 greater_equal(0xD, "ge"); 2031 greater (0xE, "gt"); 2032 //TODO cannot compile (adlc breaks) without two next lines with error: 2033 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2034 // equal' for overflow. 2035 overflow (0x20, "o"); // not really supported by the instruction 2036 no_overflow (0x21, "no"); // not really supported by the instruction 2037 %} 2038 %} 2039 2040 2041 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2042 2043 // ============================================================================ 2044 2045 instruct ShouldNotReachHere() %{ 2046 match(Halt); 2047 format %{ "ud2\t# ShouldNotReachHere" %} 2048 ins_encode %{ 2049 __ ud2(); 2050 %} 2051 ins_pipe(pipe_slow); 2052 %} 2053 2054 // =================================EVEX special=============================== 2055 2056 instruct setMask(rRegI dst, rRegI src) %{ 2057 predicate(Matcher::has_predicated_vectors()); 2058 match(Set dst (SetVectMaskI src)); 2059 effect(TEMP dst); 2060 format %{ "setvectmask $dst, $src" %} 2061 ins_encode %{ 2062 __ setvectmask($dst$$Register, $src$$Register); 2063 %} 2064 ins_pipe(pipe_slow); 2065 %} 2066 2067 // ============================================================================ 2068 2069 instruct addF_reg(regF dst, regF src) %{ 2070 predicate((UseSSE>=1) && (UseAVX == 0)); 2071 match(Set dst (AddF dst src)); 2072 2073 format %{ "addss $dst, $src" %} 2074 ins_cost(150); 2075 ins_encode %{ 2076 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2077 %} 2078 ins_pipe(pipe_slow); 2079 %} 2080 2081 instruct addF_mem(regF dst, memory src) %{ 2082 predicate((UseSSE>=1) && (UseAVX == 0)); 2083 match(Set dst (AddF dst (LoadF src))); 2084 2085 format %{ "addss $dst, $src" %} 2086 ins_cost(150); 2087 ins_encode %{ 2088 __ addss($dst$$XMMRegister, $src$$Address); 2089 %} 2090 ins_pipe(pipe_slow); 2091 %} 2092 2093 instruct addF_imm(regF dst, immF con) %{ 2094 predicate((UseSSE>=1) && (UseAVX == 0)); 2095 match(Set dst (AddF dst con)); 2096 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2097 ins_cost(150); 2098 ins_encode %{ 2099 __ addss($dst$$XMMRegister, $constantaddress($con)); 2100 %} 2101 ins_pipe(pipe_slow); 2102 %} 2103 2104 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2105 predicate(UseAVX > 0); 2106 match(Set dst (AddF src1 src2)); 2107 2108 format %{ "vaddss $dst, $src1, $src2" %} 2109 ins_cost(150); 2110 ins_encode %{ 2111 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2112 %} 2113 ins_pipe(pipe_slow); 2114 %} 2115 2116 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2117 predicate(UseAVX > 0); 2118 match(Set dst (AddF src1 (LoadF src2))); 2119 2120 format %{ "vaddss $dst, $src1, $src2" %} 2121 ins_cost(150); 2122 ins_encode %{ 2123 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2124 %} 2125 ins_pipe(pipe_slow); 2126 %} 2127 2128 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2129 predicate(UseAVX > 0); 2130 match(Set dst (AddF src con)); 2131 2132 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2133 ins_cost(150); 2134 ins_encode %{ 2135 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2136 %} 2137 ins_pipe(pipe_slow); 2138 %} 2139 2140 instruct addD_reg(regD dst, regD src) %{ 2141 predicate((UseSSE>=2) && (UseAVX == 0)); 2142 match(Set dst (AddD dst src)); 2143 2144 format %{ "addsd $dst, $src" %} 2145 ins_cost(150); 2146 ins_encode %{ 2147 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2148 %} 2149 ins_pipe(pipe_slow); 2150 %} 2151 2152 instruct addD_mem(regD dst, memory src) %{ 2153 predicate((UseSSE>=2) && (UseAVX == 0)); 2154 match(Set dst (AddD dst (LoadD src))); 2155 2156 format %{ "addsd $dst, $src" %} 2157 ins_cost(150); 2158 ins_encode %{ 2159 __ addsd($dst$$XMMRegister, $src$$Address); 2160 %} 2161 ins_pipe(pipe_slow); 2162 %} 2163 2164 instruct addD_imm(regD dst, immD con) %{ 2165 predicate((UseSSE>=2) && (UseAVX == 0)); 2166 match(Set dst (AddD dst con)); 2167 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2168 ins_cost(150); 2169 ins_encode %{ 2170 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2171 %} 2172 ins_pipe(pipe_slow); 2173 %} 2174 2175 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2176 predicate(UseAVX > 0); 2177 match(Set dst (AddD src1 src2)); 2178 2179 format %{ "vaddsd $dst, $src1, $src2" %} 2180 ins_cost(150); 2181 ins_encode %{ 2182 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2183 %} 2184 ins_pipe(pipe_slow); 2185 %} 2186 2187 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2188 predicate(UseAVX > 0); 2189 match(Set dst (AddD src1 (LoadD src2))); 2190 2191 format %{ "vaddsd $dst, $src1, $src2" %} 2192 ins_cost(150); 2193 ins_encode %{ 2194 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2195 %} 2196 ins_pipe(pipe_slow); 2197 %} 2198 2199 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2200 predicate(UseAVX > 0); 2201 match(Set dst (AddD src con)); 2202 2203 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2204 ins_cost(150); 2205 ins_encode %{ 2206 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2207 %} 2208 ins_pipe(pipe_slow); 2209 %} 2210 2211 instruct subF_reg(regF dst, regF src) %{ 2212 predicate((UseSSE>=1) && (UseAVX == 0)); 2213 match(Set dst (SubF dst src)); 2214 2215 format %{ "subss $dst, $src" %} 2216 ins_cost(150); 2217 ins_encode %{ 2218 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2219 %} 2220 ins_pipe(pipe_slow); 2221 %} 2222 2223 instruct subF_mem(regF dst, memory src) %{ 2224 predicate((UseSSE>=1) && (UseAVX == 0)); 2225 match(Set dst (SubF dst (LoadF src))); 2226 2227 format %{ "subss $dst, $src" %} 2228 ins_cost(150); 2229 ins_encode %{ 2230 __ subss($dst$$XMMRegister, $src$$Address); 2231 %} 2232 ins_pipe(pipe_slow); 2233 %} 2234 2235 instruct subF_imm(regF dst, immF con) %{ 2236 predicate((UseSSE>=1) && (UseAVX == 0)); 2237 match(Set dst (SubF dst con)); 2238 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2239 ins_cost(150); 2240 ins_encode %{ 2241 __ subss($dst$$XMMRegister, $constantaddress($con)); 2242 %} 2243 ins_pipe(pipe_slow); 2244 %} 2245 2246 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2247 predicate(UseAVX > 0); 2248 match(Set dst (SubF src1 src2)); 2249 2250 format %{ "vsubss $dst, $src1, $src2" %} 2251 ins_cost(150); 2252 ins_encode %{ 2253 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2254 %} 2255 ins_pipe(pipe_slow); 2256 %} 2257 2258 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2259 predicate(UseAVX > 0); 2260 match(Set dst (SubF src1 (LoadF src2))); 2261 2262 format %{ "vsubss $dst, $src1, $src2" %} 2263 ins_cost(150); 2264 ins_encode %{ 2265 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2266 %} 2267 ins_pipe(pipe_slow); 2268 %} 2269 2270 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2271 predicate(UseAVX > 0); 2272 match(Set dst (SubF src con)); 2273 2274 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2275 ins_cost(150); 2276 ins_encode %{ 2277 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2278 %} 2279 ins_pipe(pipe_slow); 2280 %} 2281 2282 instruct subD_reg(regD dst, regD src) %{ 2283 predicate((UseSSE>=2) && (UseAVX == 0)); 2284 match(Set dst (SubD dst src)); 2285 2286 format %{ "subsd $dst, $src" %} 2287 ins_cost(150); 2288 ins_encode %{ 2289 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2290 %} 2291 ins_pipe(pipe_slow); 2292 %} 2293 2294 instruct subD_mem(regD dst, memory src) %{ 2295 predicate((UseSSE>=2) && (UseAVX == 0)); 2296 match(Set dst (SubD dst (LoadD src))); 2297 2298 format %{ "subsd $dst, $src" %} 2299 ins_cost(150); 2300 ins_encode %{ 2301 __ subsd($dst$$XMMRegister, $src$$Address); 2302 %} 2303 ins_pipe(pipe_slow); 2304 %} 2305 2306 instruct subD_imm(regD dst, immD con) %{ 2307 predicate((UseSSE>=2) && (UseAVX == 0)); 2308 match(Set dst (SubD dst con)); 2309 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2310 ins_cost(150); 2311 ins_encode %{ 2312 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2313 %} 2314 ins_pipe(pipe_slow); 2315 %} 2316 2317 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2318 predicate(UseAVX > 0); 2319 match(Set dst (SubD src1 src2)); 2320 2321 format %{ "vsubsd $dst, $src1, $src2" %} 2322 ins_cost(150); 2323 ins_encode %{ 2324 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2325 %} 2326 ins_pipe(pipe_slow); 2327 %} 2328 2329 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2330 predicate(UseAVX > 0); 2331 match(Set dst (SubD src1 (LoadD src2))); 2332 2333 format %{ "vsubsd $dst, $src1, $src2" %} 2334 ins_cost(150); 2335 ins_encode %{ 2336 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2337 %} 2338 ins_pipe(pipe_slow); 2339 %} 2340 2341 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2342 predicate(UseAVX > 0); 2343 match(Set dst (SubD src con)); 2344 2345 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2346 ins_cost(150); 2347 ins_encode %{ 2348 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2349 %} 2350 ins_pipe(pipe_slow); 2351 %} 2352 2353 instruct mulF_reg(regF dst, regF src) %{ 2354 predicate((UseSSE>=1) && (UseAVX == 0)); 2355 match(Set dst (MulF dst src)); 2356 2357 format %{ "mulss $dst, $src" %} 2358 ins_cost(150); 2359 ins_encode %{ 2360 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2361 %} 2362 ins_pipe(pipe_slow); 2363 %} 2364 2365 instruct mulF_mem(regF dst, memory src) %{ 2366 predicate((UseSSE>=1) && (UseAVX == 0)); 2367 match(Set dst (MulF dst (LoadF src))); 2368 2369 format %{ "mulss $dst, $src" %} 2370 ins_cost(150); 2371 ins_encode %{ 2372 __ mulss($dst$$XMMRegister, $src$$Address); 2373 %} 2374 ins_pipe(pipe_slow); 2375 %} 2376 2377 instruct mulF_imm(regF dst, immF con) %{ 2378 predicate((UseSSE>=1) && (UseAVX == 0)); 2379 match(Set dst (MulF dst con)); 2380 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2381 ins_cost(150); 2382 ins_encode %{ 2383 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2384 %} 2385 ins_pipe(pipe_slow); 2386 %} 2387 2388 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2389 predicate(UseAVX > 0); 2390 match(Set dst (MulF src1 src2)); 2391 2392 format %{ "vmulss $dst, $src1, $src2" %} 2393 ins_cost(150); 2394 ins_encode %{ 2395 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2396 %} 2397 ins_pipe(pipe_slow); 2398 %} 2399 2400 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2401 predicate(UseAVX > 0); 2402 match(Set dst (MulF src1 (LoadF src2))); 2403 2404 format %{ "vmulss $dst, $src1, $src2" %} 2405 ins_cost(150); 2406 ins_encode %{ 2407 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2408 %} 2409 ins_pipe(pipe_slow); 2410 %} 2411 2412 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2413 predicate(UseAVX > 0); 2414 match(Set dst (MulF src con)); 2415 2416 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2417 ins_cost(150); 2418 ins_encode %{ 2419 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2420 %} 2421 ins_pipe(pipe_slow); 2422 %} 2423 2424 instruct mulD_reg(regD dst, regD src) %{ 2425 predicate((UseSSE>=2) && (UseAVX == 0)); 2426 match(Set dst (MulD dst src)); 2427 2428 format %{ "mulsd $dst, $src" %} 2429 ins_cost(150); 2430 ins_encode %{ 2431 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2432 %} 2433 ins_pipe(pipe_slow); 2434 %} 2435 2436 instruct mulD_mem(regD dst, memory src) %{ 2437 predicate((UseSSE>=2) && (UseAVX == 0)); 2438 match(Set dst (MulD dst (LoadD src))); 2439 2440 format %{ "mulsd $dst, $src" %} 2441 ins_cost(150); 2442 ins_encode %{ 2443 __ mulsd($dst$$XMMRegister, $src$$Address); 2444 %} 2445 ins_pipe(pipe_slow); 2446 %} 2447 2448 instruct mulD_imm(regD dst, immD con) %{ 2449 predicate((UseSSE>=2) && (UseAVX == 0)); 2450 match(Set dst (MulD dst con)); 2451 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2452 ins_cost(150); 2453 ins_encode %{ 2454 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2455 %} 2456 ins_pipe(pipe_slow); 2457 %} 2458 2459 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2460 predicate(UseAVX > 0); 2461 match(Set dst (MulD src1 src2)); 2462 2463 format %{ "vmulsd $dst, $src1, $src2" %} 2464 ins_cost(150); 2465 ins_encode %{ 2466 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2467 %} 2468 ins_pipe(pipe_slow); 2469 %} 2470 2471 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2472 predicate(UseAVX > 0); 2473 match(Set dst (MulD src1 (LoadD src2))); 2474 2475 format %{ "vmulsd $dst, $src1, $src2" %} 2476 ins_cost(150); 2477 ins_encode %{ 2478 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2479 %} 2480 ins_pipe(pipe_slow); 2481 %} 2482 2483 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2484 predicate(UseAVX > 0); 2485 match(Set dst (MulD src con)); 2486 2487 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2488 ins_cost(150); 2489 ins_encode %{ 2490 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2491 %} 2492 ins_pipe(pipe_slow); 2493 %} 2494 2495 instruct divF_reg(regF dst, regF src) %{ 2496 predicate((UseSSE>=1) && (UseAVX == 0)); 2497 match(Set dst (DivF dst src)); 2498 2499 format %{ "divss $dst, $src" %} 2500 ins_cost(150); 2501 ins_encode %{ 2502 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2503 %} 2504 ins_pipe(pipe_slow); 2505 %} 2506 2507 instruct divF_mem(regF dst, memory src) %{ 2508 predicate((UseSSE>=1) && (UseAVX == 0)); 2509 match(Set dst (DivF dst (LoadF src))); 2510 2511 format %{ "divss $dst, $src" %} 2512 ins_cost(150); 2513 ins_encode %{ 2514 __ divss($dst$$XMMRegister, $src$$Address); 2515 %} 2516 ins_pipe(pipe_slow); 2517 %} 2518 2519 instruct divF_imm(regF dst, immF con) %{ 2520 predicate((UseSSE>=1) && (UseAVX == 0)); 2521 match(Set dst (DivF dst con)); 2522 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2523 ins_cost(150); 2524 ins_encode %{ 2525 __ divss($dst$$XMMRegister, $constantaddress($con)); 2526 %} 2527 ins_pipe(pipe_slow); 2528 %} 2529 2530 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2531 predicate(UseAVX > 0); 2532 match(Set dst (DivF src1 src2)); 2533 2534 format %{ "vdivss $dst, $src1, $src2" %} 2535 ins_cost(150); 2536 ins_encode %{ 2537 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2538 %} 2539 ins_pipe(pipe_slow); 2540 %} 2541 2542 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2543 predicate(UseAVX > 0); 2544 match(Set dst (DivF src1 (LoadF src2))); 2545 2546 format %{ "vdivss $dst, $src1, $src2" %} 2547 ins_cost(150); 2548 ins_encode %{ 2549 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2550 %} 2551 ins_pipe(pipe_slow); 2552 %} 2553 2554 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2555 predicate(UseAVX > 0); 2556 match(Set dst (DivF src con)); 2557 2558 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2559 ins_cost(150); 2560 ins_encode %{ 2561 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2562 %} 2563 ins_pipe(pipe_slow); 2564 %} 2565 2566 instruct divD_reg(regD dst, regD src) %{ 2567 predicate((UseSSE>=2) && (UseAVX == 0)); 2568 match(Set dst (DivD dst src)); 2569 2570 format %{ "divsd $dst, $src" %} 2571 ins_cost(150); 2572 ins_encode %{ 2573 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2574 %} 2575 ins_pipe(pipe_slow); 2576 %} 2577 2578 instruct divD_mem(regD dst, memory src) %{ 2579 predicate((UseSSE>=2) && (UseAVX == 0)); 2580 match(Set dst (DivD dst (LoadD src))); 2581 2582 format %{ "divsd $dst, $src" %} 2583 ins_cost(150); 2584 ins_encode %{ 2585 __ divsd($dst$$XMMRegister, $src$$Address); 2586 %} 2587 ins_pipe(pipe_slow); 2588 %} 2589 2590 instruct divD_imm(regD dst, immD con) %{ 2591 predicate((UseSSE>=2) && (UseAVX == 0)); 2592 match(Set dst (DivD dst con)); 2593 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2594 ins_cost(150); 2595 ins_encode %{ 2596 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2597 %} 2598 ins_pipe(pipe_slow); 2599 %} 2600 2601 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2602 predicate(UseAVX > 0); 2603 match(Set dst (DivD src1 src2)); 2604 2605 format %{ "vdivsd $dst, $src1, $src2" %} 2606 ins_cost(150); 2607 ins_encode %{ 2608 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2609 %} 2610 ins_pipe(pipe_slow); 2611 %} 2612 2613 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2614 predicate(UseAVX > 0); 2615 match(Set dst (DivD src1 (LoadD src2))); 2616 2617 format %{ "vdivsd $dst, $src1, $src2" %} 2618 ins_cost(150); 2619 ins_encode %{ 2620 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2621 %} 2622 ins_pipe(pipe_slow); 2623 %} 2624 2625 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2626 predicate(UseAVX > 0); 2627 match(Set dst (DivD src con)); 2628 2629 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2630 ins_cost(150); 2631 ins_encode %{ 2632 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2633 %} 2634 ins_pipe(pipe_slow); 2635 %} 2636 2637 instruct absF_reg(regF dst) %{ 2638 predicate((UseSSE>=1) && (UseAVX == 0)); 2639 match(Set dst (AbsF dst)); 2640 ins_cost(150); 2641 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2642 ins_encode %{ 2643 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2644 %} 2645 ins_pipe(pipe_slow); 2646 %} 2647 2648 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2649 predicate(UseAVX > 0); 2650 match(Set dst (AbsF src)); 2651 ins_cost(150); 2652 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2653 ins_encode %{ 2654 int vector_len = 0; 2655 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2656 ExternalAddress(float_signmask()), vector_len); 2657 %} 2658 ins_pipe(pipe_slow); 2659 %} 2660 2661 instruct absD_reg(regD dst) %{ 2662 predicate((UseSSE>=2) && (UseAVX == 0)); 2663 match(Set dst (AbsD dst)); 2664 ins_cost(150); 2665 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2666 "# abs double by sign masking" %} 2667 ins_encode %{ 2668 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2669 %} 2670 ins_pipe(pipe_slow); 2671 %} 2672 2673 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2674 predicate(UseAVX > 0); 2675 match(Set dst (AbsD src)); 2676 ins_cost(150); 2677 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2678 "# abs double by sign masking" %} 2679 ins_encode %{ 2680 int vector_len = 0; 2681 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2682 ExternalAddress(double_signmask()), vector_len); 2683 %} 2684 ins_pipe(pipe_slow); 2685 %} 2686 2687 instruct negF_reg(regF dst) %{ 2688 predicate((UseSSE>=1) && (UseAVX == 0)); 2689 match(Set dst (NegF dst)); 2690 ins_cost(150); 2691 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2692 ins_encode %{ 2693 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2694 %} 2695 ins_pipe(pipe_slow); 2696 %} 2697 2698 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2699 predicate(UseAVX > 0); 2700 match(Set dst (NegF src)); 2701 ins_cost(150); 2702 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2703 ins_encode %{ 2704 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2705 ExternalAddress(float_signflip())); 2706 %} 2707 ins_pipe(pipe_slow); 2708 %} 2709 2710 instruct negD_reg(regD dst) %{ 2711 predicate((UseSSE>=2) && (UseAVX == 0)); 2712 match(Set dst (NegD dst)); 2713 ins_cost(150); 2714 format %{ "xorpd $dst, [0x8000000000000000]\t" 2715 "# neg double by sign flipping" %} 2716 ins_encode %{ 2717 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2718 %} 2719 ins_pipe(pipe_slow); 2720 %} 2721 2722 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2723 predicate(UseAVX > 0); 2724 match(Set dst (NegD src)); 2725 ins_cost(150); 2726 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2727 "# neg double by sign flipping" %} 2728 ins_encode %{ 2729 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2730 ExternalAddress(double_signflip())); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 instruct sqrtF_reg(regF dst, regF src) %{ 2736 predicate(UseSSE>=1); 2737 match(Set dst (SqrtF src)); 2738 2739 format %{ "sqrtss $dst, $src" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 instruct sqrtF_mem(regF dst, memory src) %{ 2748 predicate(UseSSE>=1); 2749 match(Set dst (SqrtF (LoadF src))); 2750 2751 format %{ "sqrtss $dst, $src" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ sqrtss($dst$$XMMRegister, $src$$Address); 2755 %} 2756 ins_pipe(pipe_slow); 2757 %} 2758 2759 instruct sqrtF_imm(regF dst, immF con) %{ 2760 predicate(UseSSE>=1); 2761 match(Set dst (SqrtF con)); 2762 2763 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2764 ins_cost(150); 2765 ins_encode %{ 2766 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2767 %} 2768 ins_pipe(pipe_slow); 2769 %} 2770 2771 instruct sqrtD_reg(regD dst, regD src) %{ 2772 predicate(UseSSE>=2); 2773 match(Set dst (SqrtD src)); 2774 2775 format %{ "sqrtsd $dst, $src" %} 2776 ins_cost(150); 2777 ins_encode %{ 2778 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2779 %} 2780 ins_pipe(pipe_slow); 2781 %} 2782 2783 instruct sqrtD_mem(regD dst, memory src) %{ 2784 predicate(UseSSE>=2); 2785 match(Set dst (SqrtD (LoadD src))); 2786 2787 format %{ "sqrtsd $dst, $src" %} 2788 ins_cost(150); 2789 ins_encode %{ 2790 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2791 %} 2792 ins_pipe(pipe_slow); 2793 %} 2794 2795 instruct sqrtD_imm(regD dst, immD con) %{ 2796 predicate(UseSSE>=2); 2797 match(Set dst (SqrtD con)); 2798 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2799 ins_cost(150); 2800 ins_encode %{ 2801 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2802 %} 2803 ins_pipe(pipe_slow); 2804 %} 2805 2806 instruct onspinwait() %{ 2807 match(OnSpinWait); 2808 ins_cost(200); 2809 2810 format %{ 2811 $$template 2812 $$emit$$"pause\t! membar_onspinwait" 2813 %} 2814 ins_encode %{ 2815 __ pause(); 2816 %} 2817 ins_pipe(pipe_slow); 2818 %} 2819 2820 // a * b + c 2821 instruct fmaD_reg(regD a, regD b, regD c) %{ 2822 predicate(UseFMA); 2823 match(Set c (FmaD c (Binary a b))); 2824 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2825 ins_cost(150); 2826 ins_encode %{ 2827 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2828 %} 2829 ins_pipe( pipe_slow ); 2830 %} 2831 2832 // a * b + c 2833 instruct fmaF_reg(regF a, regF b, regF c) %{ 2834 predicate(UseFMA); 2835 match(Set c (FmaF c (Binary a b))); 2836 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2837 ins_cost(150); 2838 ins_encode %{ 2839 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2840 %} 2841 ins_pipe( pipe_slow ); 2842 %} 2843 2844 // ====================VECTOR INSTRUCTIONS===================================== 2845 2846 2847 // Load vectors (4 bytes long) 2848 instruct loadV4(vecS dst, memory mem) %{ 2849 predicate(n->as_LoadVector()->memory_size() == 4); 2850 match(Set dst (LoadVector mem)); 2851 ins_cost(125); 2852 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2853 ins_encode %{ 2854 __ movdl($dst$$XMMRegister, $mem$$Address); 2855 %} 2856 ins_pipe( pipe_slow ); 2857 %} 2858 2859 // Load vectors (4 bytes long) 2860 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2861 match(Set dst src); 2862 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2863 ins_encode %{ 2864 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2865 %} 2866 ins_pipe( fpu_reg_reg ); 2867 %} 2868 2869 // Load vectors (4 bytes long) 2870 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2871 match(Set dst src); 2872 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2873 ins_encode %{ 2874 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2875 %} 2876 ins_pipe( fpu_reg_reg ); 2877 %} 2878 2879 // Load vectors (8 bytes long) 2880 instruct loadV8(vecD dst, memory mem) %{ 2881 predicate(n->as_LoadVector()->memory_size() == 8); 2882 match(Set dst (LoadVector mem)); 2883 ins_cost(125); 2884 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2885 ins_encode %{ 2886 __ movq($dst$$XMMRegister, $mem$$Address); 2887 %} 2888 ins_pipe( pipe_slow ); 2889 %} 2890 2891 // Load vectors (8 bytes long) 2892 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2893 match(Set dst src); 2894 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2895 ins_encode %{ 2896 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2897 %} 2898 ins_pipe( fpu_reg_reg ); 2899 %} 2900 2901 // Load vectors (8 bytes long) 2902 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2903 match(Set dst src); 2904 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2905 ins_encode %{ 2906 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2907 %} 2908 ins_pipe( fpu_reg_reg ); 2909 %} 2910 2911 // Load vectors (16 bytes long) 2912 instruct loadV16(vecX dst, memory mem) %{ 2913 predicate(n->as_LoadVector()->memory_size() == 16); 2914 match(Set dst (LoadVector mem)); 2915 ins_cost(125); 2916 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2917 ins_encode %{ 2918 __ movdqu($dst$$XMMRegister, $mem$$Address); 2919 %} 2920 ins_pipe( pipe_slow ); 2921 %} 2922 2923 // Load vectors (16 bytes long) 2924 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2925 match(Set dst src); 2926 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2927 ins_encode %{ 2928 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2929 int vector_len = 2; 2930 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2931 } else { 2932 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2933 } 2934 %} 2935 ins_pipe( fpu_reg_reg ); 2936 %} 2937 2938 // Load vectors (16 bytes long) 2939 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2940 match(Set dst src); 2941 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2942 ins_encode %{ 2943 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2944 int vector_len = 2; 2945 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2946 } else { 2947 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2948 } 2949 %} 2950 ins_pipe( fpu_reg_reg ); 2951 %} 2952 2953 // Load vectors (32 bytes long) 2954 instruct loadV32(vecY dst, memory mem) %{ 2955 predicate(n->as_LoadVector()->memory_size() == 32); 2956 match(Set dst (LoadVector mem)); 2957 ins_cost(125); 2958 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2959 ins_encode %{ 2960 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2961 %} 2962 ins_pipe( pipe_slow ); 2963 %} 2964 2965 // Load vectors (32 bytes long) 2966 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 2967 match(Set dst src); 2968 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2969 ins_encode %{ 2970 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2971 int vector_len = 2; 2972 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2973 } else { 2974 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2975 } 2976 %} 2977 ins_pipe( fpu_reg_reg ); 2978 %} 2979 2980 // Load vectors (32 bytes long) 2981 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 2982 match(Set dst src); 2983 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2984 ins_encode %{ 2985 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2986 int vector_len = 2; 2987 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2988 } else { 2989 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2990 } 2991 %} 2992 ins_pipe( fpu_reg_reg ); 2993 %} 2994 2995 // Load vectors (64 bytes long) 2996 instruct loadV64_dword(vecZ dst, memory mem) %{ 2997 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2998 match(Set dst (LoadVector mem)); 2999 ins_cost(125); 3000 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3001 ins_encode %{ 3002 int vector_len = 2; 3003 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3004 %} 3005 ins_pipe( pipe_slow ); 3006 %} 3007 3008 // Load vectors (64 bytes long) 3009 instruct loadV64_qword(vecZ dst, memory mem) %{ 3010 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3011 match(Set dst (LoadVector mem)); 3012 ins_cost(125); 3013 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3014 ins_encode %{ 3015 int vector_len = 2; 3016 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3017 %} 3018 ins_pipe( pipe_slow ); 3019 %} 3020 3021 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3022 match(Set dst src); 3023 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3024 ins_encode %{ 3025 int vector_len = 2; 3026 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3027 %} 3028 ins_pipe( fpu_reg_reg ); 3029 %} 3030 3031 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3032 match(Set dst src); 3033 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3034 ins_encode %{ 3035 int vector_len = 2; 3036 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3037 %} 3038 ins_pipe( fpu_reg_reg ); 3039 %} 3040 3041 // Store vectors 3042 instruct storeV4(memory mem, vecS src) %{ 3043 predicate(n->as_StoreVector()->memory_size() == 4); 3044 match(Set mem (StoreVector mem src)); 3045 ins_cost(145); 3046 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3047 ins_encode %{ 3048 __ movdl($mem$$Address, $src$$XMMRegister); 3049 %} 3050 ins_pipe( pipe_slow ); 3051 %} 3052 3053 instruct storeV8(memory mem, vecD src) %{ 3054 predicate(n->as_StoreVector()->memory_size() == 8); 3055 match(Set mem (StoreVector mem src)); 3056 ins_cost(145); 3057 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3058 ins_encode %{ 3059 __ movq($mem$$Address, $src$$XMMRegister); 3060 %} 3061 ins_pipe( pipe_slow ); 3062 %} 3063 3064 instruct storeV16(memory mem, vecX src) %{ 3065 predicate(n->as_StoreVector()->memory_size() == 16); 3066 match(Set mem (StoreVector mem src)); 3067 ins_cost(145); 3068 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3069 ins_encode %{ 3070 __ movdqu($mem$$Address, $src$$XMMRegister); 3071 %} 3072 ins_pipe( pipe_slow ); 3073 %} 3074 3075 instruct storeV32(memory mem, vecY src) %{ 3076 predicate(n->as_StoreVector()->memory_size() == 32); 3077 match(Set mem (StoreVector mem src)); 3078 ins_cost(145); 3079 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3080 ins_encode %{ 3081 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3082 %} 3083 ins_pipe( pipe_slow ); 3084 %} 3085 3086 instruct storeV64_dword(memory mem, vecZ src) %{ 3087 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3088 match(Set mem (StoreVector mem src)); 3089 ins_cost(145); 3090 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3091 ins_encode %{ 3092 int vector_len = 2; 3093 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3094 %} 3095 ins_pipe( pipe_slow ); 3096 %} 3097 3098 instruct storeV64_qword(memory mem, vecZ src) %{ 3099 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3100 match(Set mem (StoreVector mem src)); 3101 ins_cost(145); 3102 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3103 ins_encode %{ 3104 int vector_len = 2; 3105 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3106 %} 3107 ins_pipe( pipe_slow ); 3108 %} 3109 3110 // ====================LEGACY REPLICATE======================================= 3111 3112 instruct Repl4B_mem(vecS dst, memory mem) %{ 3113 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3114 match(Set dst (ReplicateB (LoadB mem))); 3115 format %{ "punpcklbw $dst,$mem\n\t" 3116 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3117 ins_encode %{ 3118 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3119 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3120 %} 3121 ins_pipe( pipe_slow ); 3122 %} 3123 3124 instruct Repl8B_mem(vecD dst, memory mem) %{ 3125 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3126 match(Set dst (ReplicateB (LoadB mem))); 3127 format %{ "punpcklbw $dst,$mem\n\t" 3128 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3129 ins_encode %{ 3130 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3131 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3132 %} 3133 ins_pipe( pipe_slow ); 3134 %} 3135 3136 instruct Repl16B(vecX dst, rRegI src) %{ 3137 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3138 match(Set dst (ReplicateB src)); 3139 format %{ "movd $dst,$src\n\t" 3140 "punpcklbw $dst,$dst\n\t" 3141 "pshuflw $dst,$dst,0x00\n\t" 3142 "punpcklqdq $dst,$dst\t! replicate16B" %} 3143 ins_encode %{ 3144 __ movdl($dst$$XMMRegister, $src$$Register); 3145 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3146 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3147 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3148 %} 3149 ins_pipe( pipe_slow ); 3150 %} 3151 3152 instruct Repl16B_mem(vecX dst, memory mem) %{ 3153 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3154 match(Set dst (ReplicateB (LoadB mem))); 3155 format %{ "punpcklbw $dst,$mem\n\t" 3156 "pshuflw $dst,$dst,0x00\n\t" 3157 "punpcklqdq $dst,$dst\t! replicate16B" %} 3158 ins_encode %{ 3159 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3160 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3161 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3162 %} 3163 ins_pipe( pipe_slow ); 3164 %} 3165 3166 instruct Repl32B(vecY dst, rRegI src) %{ 3167 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3168 match(Set dst (ReplicateB src)); 3169 format %{ "movd $dst,$src\n\t" 3170 "punpcklbw $dst,$dst\n\t" 3171 "pshuflw $dst,$dst,0x00\n\t" 3172 "punpcklqdq $dst,$dst\n\t" 3173 "vinserti128_high $dst,$dst\t! replicate32B" %} 3174 ins_encode %{ 3175 __ movdl($dst$$XMMRegister, $src$$Register); 3176 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3177 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3178 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3179 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3180 %} 3181 ins_pipe( pipe_slow ); 3182 %} 3183 3184 instruct Repl32B_mem(vecY dst, memory mem) %{ 3185 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3186 match(Set dst (ReplicateB (LoadB mem))); 3187 format %{ "punpcklbw $dst,$mem\n\t" 3188 "pshuflw $dst,$dst,0x00\n\t" 3189 "punpcklqdq $dst,$dst\n\t" 3190 "vinserti128_high $dst,$dst\t! replicate32B" %} 3191 ins_encode %{ 3192 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3193 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3194 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3195 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3196 %} 3197 ins_pipe( pipe_slow ); 3198 %} 3199 3200 instruct Repl64B(legVecZ dst, rRegI src) %{ 3201 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3202 match(Set dst (ReplicateB src)); 3203 format %{ "movd $dst,$src\n\t" 3204 "punpcklbw $dst,$dst\n\t" 3205 "pshuflw $dst,$dst,0x00\n\t" 3206 "punpcklqdq $dst,$dst\n\t" 3207 "vinserti128_high $dst,$dst\t" 3208 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3209 ins_encode %{ 3210 __ movdl($dst$$XMMRegister, $src$$Register); 3211 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3212 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3213 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3214 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3215 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 3220 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3221 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3222 match(Set dst (ReplicateB (LoadB mem))); 3223 format %{ "punpcklbw $dst,$mem\n\t" 3224 "pshuflw $dst,$dst,0x00\n\t" 3225 "punpcklqdq $dst,$dst\n\t" 3226 "vinserti128_high $dst,$dst\t" 3227 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3228 ins_encode %{ 3229 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3230 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3231 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3232 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3233 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3234 %} 3235 ins_pipe( pipe_slow ); 3236 %} 3237 3238 instruct Repl16B_imm(vecX dst, immI con) %{ 3239 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3240 match(Set dst (ReplicateB con)); 3241 format %{ "movq $dst,[$constantaddress]\n\t" 3242 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3243 ins_encode %{ 3244 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3245 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3246 %} 3247 ins_pipe( pipe_slow ); 3248 %} 3249 3250 instruct Repl32B_imm(vecY dst, immI con) %{ 3251 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3252 match(Set dst (ReplicateB con)); 3253 format %{ "movq $dst,[$constantaddress]\n\t" 3254 "punpcklqdq $dst,$dst\n\t" 3255 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3256 ins_encode %{ 3257 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3258 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3259 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3260 %} 3261 ins_pipe( pipe_slow ); 3262 %} 3263 3264 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3265 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3266 match(Set dst (ReplicateB con)); 3267 format %{ "movq $dst,[$constantaddress]\n\t" 3268 "punpcklqdq $dst,$dst\n\t" 3269 "vinserti128_high $dst,$dst\t" 3270 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3271 ins_encode %{ 3272 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3273 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3274 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3275 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3276 %} 3277 ins_pipe( pipe_slow ); 3278 %} 3279 3280 instruct Repl4S(vecD dst, rRegI src) %{ 3281 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3282 match(Set dst (ReplicateS src)); 3283 format %{ "movd $dst,$src\n\t" 3284 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3285 ins_encode %{ 3286 __ movdl($dst$$XMMRegister, $src$$Register); 3287 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 instruct Repl4S_mem(vecD dst, memory mem) %{ 3293 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3294 match(Set dst (ReplicateS (LoadS mem))); 3295 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3296 ins_encode %{ 3297 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3298 %} 3299 ins_pipe( pipe_slow ); 3300 %} 3301 3302 instruct Repl8S(vecX dst, rRegI src) %{ 3303 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3304 match(Set dst (ReplicateS src)); 3305 format %{ "movd $dst,$src\n\t" 3306 "pshuflw $dst,$dst,0x00\n\t" 3307 "punpcklqdq $dst,$dst\t! replicate8S" %} 3308 ins_encode %{ 3309 __ movdl($dst$$XMMRegister, $src$$Register); 3310 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3311 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3312 %} 3313 ins_pipe( pipe_slow ); 3314 %} 3315 3316 instruct Repl8S_mem(vecX dst, memory mem) %{ 3317 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3318 match(Set dst (ReplicateS (LoadS mem))); 3319 format %{ "pshuflw $dst,$mem,0x00\n\t" 3320 "punpcklqdq $dst,$dst\t! replicate8S" %} 3321 ins_encode %{ 3322 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3323 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( pipe_slow ); 3326 %} 3327 3328 instruct Repl8S_imm(vecX dst, immI con) %{ 3329 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3330 match(Set dst (ReplicateS con)); 3331 format %{ "movq $dst,[$constantaddress]\n\t" 3332 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3333 ins_encode %{ 3334 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3335 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3336 %} 3337 ins_pipe( pipe_slow ); 3338 %} 3339 3340 instruct Repl16S(vecY dst, rRegI src) %{ 3341 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3342 match(Set dst (ReplicateS src)); 3343 format %{ "movd $dst,$src\n\t" 3344 "pshuflw $dst,$dst,0x00\n\t" 3345 "punpcklqdq $dst,$dst\n\t" 3346 "vinserti128_high $dst,$dst\t! replicate16S" %} 3347 ins_encode %{ 3348 __ movdl($dst$$XMMRegister, $src$$Register); 3349 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3350 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl16S_mem(vecY dst, memory mem) %{ 3357 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3358 match(Set dst (ReplicateS (LoadS mem))); 3359 format %{ "pshuflw $dst,$mem,0x00\n\t" 3360 "punpcklqdq $dst,$dst\n\t" 3361 "vinserti128_high $dst,$dst\t! replicate16S" %} 3362 ins_encode %{ 3363 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3364 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3365 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 instruct Repl16S_imm(vecY dst, immI con) %{ 3371 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3372 match(Set dst (ReplicateS con)); 3373 format %{ "movq $dst,[$constantaddress]\n\t" 3374 "punpcklqdq $dst,$dst\n\t" 3375 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3376 ins_encode %{ 3377 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3378 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3379 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3380 %} 3381 ins_pipe( pipe_slow ); 3382 %} 3383 3384 instruct Repl32S(legVecZ dst, rRegI src) %{ 3385 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3386 match(Set dst (ReplicateS src)); 3387 format %{ "movd $dst,$src\n\t" 3388 "pshuflw $dst,$dst,0x00\n\t" 3389 "punpcklqdq $dst,$dst\n\t" 3390 "vinserti128_high $dst,$dst\t" 3391 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3392 ins_encode %{ 3393 __ movdl($dst$$XMMRegister, $src$$Register); 3394 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3395 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3396 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3397 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3398 %} 3399 ins_pipe( pipe_slow ); 3400 %} 3401 3402 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3403 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3404 match(Set dst (ReplicateS (LoadS mem))); 3405 format %{ "pshuflw $dst,$mem,0x00\n\t" 3406 "punpcklqdq $dst,$dst\n\t" 3407 "vinserti128_high $dst,$dst\t" 3408 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3409 ins_encode %{ 3410 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3411 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3412 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3413 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3414 %} 3415 ins_pipe( pipe_slow ); 3416 %} 3417 3418 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3419 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3420 match(Set dst (ReplicateS con)); 3421 format %{ "movq $dst,[$constantaddress]\n\t" 3422 "punpcklqdq $dst,$dst\n\t" 3423 "vinserti128_high $dst,$dst\t" 3424 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3425 ins_encode %{ 3426 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3427 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3428 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3429 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3430 %} 3431 ins_pipe( pipe_slow ); 3432 %} 3433 3434 instruct Repl4I(vecX dst, rRegI src) %{ 3435 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3436 match(Set dst (ReplicateI src)); 3437 format %{ "movd $dst,$src\n\t" 3438 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3439 ins_encode %{ 3440 __ movdl($dst$$XMMRegister, $src$$Register); 3441 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3442 %} 3443 ins_pipe( pipe_slow ); 3444 %} 3445 3446 instruct Repl4I_mem(vecX dst, memory mem) %{ 3447 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3448 match(Set dst (ReplicateI (LoadI mem))); 3449 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3450 ins_encode %{ 3451 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3452 %} 3453 ins_pipe( pipe_slow ); 3454 %} 3455 3456 instruct Repl8I(vecY dst, rRegI src) %{ 3457 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3458 match(Set dst (ReplicateI src)); 3459 format %{ "movd $dst,$src\n\t" 3460 "pshufd $dst,$dst,0x00\n\t" 3461 "vinserti128_high $dst,$dst\t! replicate8I" %} 3462 ins_encode %{ 3463 __ movdl($dst$$XMMRegister, $src$$Register); 3464 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3465 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 instruct Repl8I_mem(vecY dst, memory mem) %{ 3471 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3472 match(Set dst (ReplicateI (LoadI mem))); 3473 format %{ "pshufd $dst,$mem,0x00\n\t" 3474 "vinserti128_high $dst,$dst\t! replicate8I" %} 3475 ins_encode %{ 3476 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3477 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3478 %} 3479 ins_pipe( pipe_slow ); 3480 %} 3481 3482 instruct Repl16I(legVecZ dst, rRegI src) %{ 3483 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3484 match(Set dst (ReplicateI src)); 3485 format %{ "movd $dst,$src\n\t" 3486 "pshufd $dst,$dst,0x00\n\t" 3487 "vinserti128_high $dst,$dst\t" 3488 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3489 ins_encode %{ 3490 __ movdl($dst$$XMMRegister, $src$$Register); 3491 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3492 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3493 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3494 %} 3495 ins_pipe( pipe_slow ); 3496 %} 3497 3498 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3499 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3500 match(Set dst (ReplicateI (LoadI mem))); 3501 format %{ "pshufd $dst,$mem,0x00\n\t" 3502 "vinserti128_high $dst,$dst\t" 3503 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3504 ins_encode %{ 3505 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3506 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3507 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3508 %} 3509 ins_pipe( pipe_slow ); 3510 %} 3511 3512 instruct Repl4I_imm(vecX dst, immI con) %{ 3513 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3514 match(Set dst (ReplicateI con)); 3515 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3516 "punpcklqdq $dst,$dst" %} 3517 ins_encode %{ 3518 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3519 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3520 %} 3521 ins_pipe( pipe_slow ); 3522 %} 3523 3524 instruct Repl8I_imm(vecY dst, immI con) %{ 3525 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3526 match(Set dst (ReplicateI con)); 3527 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3528 "punpcklqdq $dst,$dst\n\t" 3529 "vinserti128_high $dst,$dst" %} 3530 ins_encode %{ 3531 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3532 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3533 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3534 %} 3535 ins_pipe( pipe_slow ); 3536 %} 3537 3538 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3539 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3540 match(Set dst (ReplicateI con)); 3541 format %{ "movq $dst,[$constantaddress]\t" 3542 "punpcklqdq $dst,$dst\n\t" 3543 "vinserti128_high $dst,$dst" 3544 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3545 ins_encode %{ 3546 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3547 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3548 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3549 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3550 %} 3551 ins_pipe( pipe_slow ); 3552 %} 3553 3554 // Long could be loaded into xmm register directly from memory. 3555 instruct Repl2L_mem(vecX dst, memory mem) %{ 3556 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3557 match(Set dst (ReplicateL (LoadL mem))); 3558 format %{ "movq $dst,$mem\n\t" 3559 "punpcklqdq $dst,$dst\t! replicate2L" %} 3560 ins_encode %{ 3561 __ movq($dst$$XMMRegister, $mem$$Address); 3562 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3563 %} 3564 ins_pipe( pipe_slow ); 3565 %} 3566 3567 // Replicate long (8 byte) scalar to be vector 3568 #ifdef _LP64 3569 instruct Repl4L(vecY dst, rRegL src) %{ 3570 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3571 match(Set dst (ReplicateL src)); 3572 format %{ "movdq $dst,$src\n\t" 3573 "punpcklqdq $dst,$dst\n\t" 3574 "vinserti128_high $dst,$dst\t! replicate4L" %} 3575 ins_encode %{ 3576 __ movdq($dst$$XMMRegister, $src$$Register); 3577 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3578 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3579 %} 3580 ins_pipe( pipe_slow ); 3581 %} 3582 3583 instruct Repl8L(legVecZ dst, rRegL src) %{ 3584 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3585 match(Set dst (ReplicateL src)); 3586 format %{ "movdq $dst,$src\n\t" 3587 "punpcklqdq $dst,$dst\n\t" 3588 "vinserti128_high $dst,$dst\t" 3589 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3590 ins_encode %{ 3591 __ movdq($dst$$XMMRegister, $src$$Register); 3592 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3593 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3594 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3595 %} 3596 ins_pipe( pipe_slow ); 3597 %} 3598 #else // _LP64 3599 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3600 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3601 match(Set dst (ReplicateL src)); 3602 effect(TEMP dst, USE src, TEMP tmp); 3603 format %{ "movdl $dst,$src.lo\n\t" 3604 "movdl $tmp,$src.hi\n\t" 3605 "punpckldq $dst,$tmp\n\t" 3606 "punpcklqdq $dst,$dst\n\t" 3607 "vinserti128_high $dst,$dst\t! replicate4L" %} 3608 ins_encode %{ 3609 __ movdl($dst$$XMMRegister, $src$$Register); 3610 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3611 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3612 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3613 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3614 %} 3615 ins_pipe( pipe_slow ); 3616 %} 3617 3618 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3619 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3620 match(Set dst (ReplicateL src)); 3621 effect(TEMP dst, USE src, TEMP tmp); 3622 format %{ "movdl $dst,$src.lo\n\t" 3623 "movdl $tmp,$src.hi\n\t" 3624 "punpckldq $dst,$tmp\n\t" 3625 "punpcklqdq $dst,$dst\n\t" 3626 "vinserti128_high $dst,$dst\t" 3627 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3628 ins_encode %{ 3629 __ movdl($dst$$XMMRegister, $src$$Register); 3630 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3631 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3632 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3633 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3634 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3635 %} 3636 ins_pipe( pipe_slow ); 3637 %} 3638 #endif // _LP64 3639 3640 instruct Repl4L_imm(vecY dst, immL con) %{ 3641 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3642 match(Set dst (ReplicateL con)); 3643 format %{ "movq $dst,[$constantaddress]\n\t" 3644 "punpcklqdq $dst,$dst\n\t" 3645 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3646 ins_encode %{ 3647 __ movq($dst$$XMMRegister, $constantaddress($con)); 3648 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3649 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3655 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3656 match(Set dst (ReplicateL con)); 3657 format %{ "movq $dst,[$constantaddress]\n\t" 3658 "punpcklqdq $dst,$dst\n\t" 3659 "vinserti128_high $dst,$dst\t" 3660 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3661 ins_encode %{ 3662 __ movq($dst$$XMMRegister, $constantaddress($con)); 3663 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3664 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3665 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3666 %} 3667 ins_pipe( pipe_slow ); 3668 %} 3669 3670 instruct Repl4L_mem(vecY dst, memory mem) %{ 3671 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3672 match(Set dst (ReplicateL (LoadL mem))); 3673 format %{ "movq $dst,$mem\n\t" 3674 "punpcklqdq $dst,$dst\n\t" 3675 "vinserti128_high $dst,$dst\t! replicate4L" %} 3676 ins_encode %{ 3677 __ movq($dst$$XMMRegister, $mem$$Address); 3678 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3679 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3685 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3686 match(Set dst (ReplicateL (LoadL mem))); 3687 format %{ "movq $dst,$mem\n\t" 3688 "punpcklqdq $dst,$dst\n\t" 3689 "vinserti128_high $dst,$dst\t" 3690 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3691 ins_encode %{ 3692 __ movq($dst$$XMMRegister, $mem$$Address); 3693 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3694 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3695 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct Repl2F_mem(vecD dst, memory mem) %{ 3701 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3702 match(Set dst (ReplicateF (LoadF mem))); 3703 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3704 ins_encode %{ 3705 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 instruct Repl4F_mem(vecX dst, memory mem) %{ 3711 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3712 match(Set dst (ReplicateF (LoadF mem))); 3713 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3714 ins_encode %{ 3715 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct Repl8F(vecY dst, vlRegF src) %{ 3721 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3722 match(Set dst (ReplicateF src)); 3723 format %{ "pshufd $dst,$src,0x00\n\t" 3724 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3725 ins_encode %{ 3726 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3727 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3728 %} 3729 ins_pipe( pipe_slow ); 3730 %} 3731 3732 instruct Repl8F_mem(vecY dst, memory mem) %{ 3733 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3734 match(Set dst (ReplicateF (LoadF mem))); 3735 format %{ "pshufd $dst,$mem,0x00\n\t" 3736 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3737 ins_encode %{ 3738 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3739 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3740 %} 3741 ins_pipe( pipe_slow ); 3742 %} 3743 3744 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3745 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3746 match(Set dst (ReplicateF src)); 3747 format %{ "pshufd $dst,$src,0x00\n\t" 3748 "vinsertf128_high $dst,$dst\t" 3749 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3750 ins_encode %{ 3751 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3752 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3753 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3754 %} 3755 ins_pipe( pipe_slow ); 3756 %} 3757 3758 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3759 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3760 match(Set dst (ReplicateF (LoadF mem))); 3761 format %{ "pshufd $dst,$mem,0x00\n\t" 3762 "vinsertf128_high $dst,$dst\t" 3763 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3764 ins_encode %{ 3765 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3766 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3767 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3768 %} 3769 ins_pipe( pipe_slow ); 3770 %} 3771 3772 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3773 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3774 match(Set dst (ReplicateF zero)); 3775 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3776 ins_encode %{ 3777 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3778 %} 3779 ins_pipe( fpu_reg_reg ); 3780 %} 3781 3782 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3783 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3784 match(Set dst (ReplicateF zero)); 3785 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3786 ins_encode %{ 3787 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3788 %} 3789 ins_pipe( fpu_reg_reg ); 3790 %} 3791 3792 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3793 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3794 match(Set dst (ReplicateF zero)); 3795 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3796 ins_encode %{ 3797 int vector_len = 1; 3798 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3799 %} 3800 ins_pipe( fpu_reg_reg ); 3801 %} 3802 3803 instruct Repl2D_mem(vecX dst, memory mem) %{ 3804 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3805 match(Set dst (ReplicateD (LoadD mem))); 3806 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3807 ins_encode %{ 3808 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3809 %} 3810 ins_pipe( pipe_slow ); 3811 %} 3812 3813 instruct Repl4D(vecY dst, vlRegD src) %{ 3814 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3815 match(Set dst (ReplicateD src)); 3816 format %{ "pshufd $dst,$src,0x44\n\t" 3817 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3818 ins_encode %{ 3819 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3820 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3821 %} 3822 ins_pipe( pipe_slow ); 3823 %} 3824 3825 instruct Repl4D_mem(vecY dst, memory mem) %{ 3826 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3827 match(Set dst (ReplicateD (LoadD mem))); 3828 format %{ "pshufd $dst,$mem,0x44\n\t" 3829 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3830 ins_encode %{ 3831 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3832 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3833 %} 3834 ins_pipe( pipe_slow ); 3835 %} 3836 3837 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3838 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3839 match(Set dst (ReplicateD src)); 3840 format %{ "pshufd $dst,$src,0x44\n\t" 3841 "vinsertf128_high $dst,$dst\t" 3842 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3843 ins_encode %{ 3844 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3845 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3846 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3847 %} 3848 ins_pipe( pipe_slow ); 3849 %} 3850 3851 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3852 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3853 match(Set dst (ReplicateD (LoadD mem))); 3854 format %{ "pshufd $dst,$mem,0x44\n\t" 3855 "vinsertf128_high $dst,$dst\t" 3856 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3857 ins_encode %{ 3858 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3859 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3860 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3861 %} 3862 ins_pipe( pipe_slow ); 3863 %} 3864 3865 // Replicate double (8 byte) scalar zero to be vector 3866 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3867 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3868 match(Set dst (ReplicateD zero)); 3869 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3870 ins_encode %{ 3871 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3872 %} 3873 ins_pipe( fpu_reg_reg ); 3874 %} 3875 3876 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3877 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3878 match(Set dst (ReplicateD zero)); 3879 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3880 ins_encode %{ 3881 int vector_len = 1; 3882 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3883 %} 3884 ins_pipe( fpu_reg_reg ); 3885 %} 3886 3887 // ====================GENERIC REPLICATE========================================== 3888 3889 // Replicate byte scalar to be vector 3890 instruct Repl4B(vecS dst, rRegI src) %{ 3891 predicate(n->as_Vector()->length() == 4); 3892 match(Set dst (ReplicateB src)); 3893 format %{ "movd $dst,$src\n\t" 3894 "punpcklbw $dst,$dst\n\t" 3895 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3896 ins_encode %{ 3897 __ movdl($dst$$XMMRegister, $src$$Register); 3898 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3899 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3900 %} 3901 ins_pipe( pipe_slow ); 3902 %} 3903 3904 instruct Repl8B(vecD dst, rRegI src) %{ 3905 predicate(n->as_Vector()->length() == 8); 3906 match(Set dst (ReplicateB src)); 3907 format %{ "movd $dst,$src\n\t" 3908 "punpcklbw $dst,$dst\n\t" 3909 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3910 ins_encode %{ 3911 __ movdl($dst$$XMMRegister, $src$$Register); 3912 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3913 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3914 %} 3915 ins_pipe( pipe_slow ); 3916 %} 3917 3918 // Replicate byte scalar immediate to be vector by loading from const table. 3919 instruct Repl4B_imm(vecS dst, immI con) %{ 3920 predicate(n->as_Vector()->length() == 4); 3921 match(Set dst (ReplicateB con)); 3922 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3923 ins_encode %{ 3924 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3925 %} 3926 ins_pipe( pipe_slow ); 3927 %} 3928 3929 instruct Repl8B_imm(vecD dst, immI con) %{ 3930 predicate(n->as_Vector()->length() == 8); 3931 match(Set dst (ReplicateB con)); 3932 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3933 ins_encode %{ 3934 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3935 %} 3936 ins_pipe( pipe_slow ); 3937 %} 3938 3939 // Replicate byte scalar zero to be vector 3940 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3941 predicate(n->as_Vector()->length() == 4); 3942 match(Set dst (ReplicateB zero)); 3943 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3944 ins_encode %{ 3945 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3946 %} 3947 ins_pipe( fpu_reg_reg ); 3948 %} 3949 3950 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3951 predicate(n->as_Vector()->length() == 8); 3952 match(Set dst (ReplicateB zero)); 3953 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3954 ins_encode %{ 3955 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3956 %} 3957 ins_pipe( fpu_reg_reg ); 3958 %} 3959 3960 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3961 predicate(n->as_Vector()->length() == 16); 3962 match(Set dst (ReplicateB zero)); 3963 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3964 ins_encode %{ 3965 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3966 %} 3967 ins_pipe( fpu_reg_reg ); 3968 %} 3969 3970 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3971 predicate(n->as_Vector()->length() == 32); 3972 match(Set dst (ReplicateB zero)); 3973 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3974 ins_encode %{ 3975 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3976 int vector_len = 1; 3977 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3978 %} 3979 ins_pipe( fpu_reg_reg ); 3980 %} 3981 3982 // Replicate char/short (2 byte) scalar to be vector 3983 instruct Repl2S(vecS dst, rRegI src) %{ 3984 predicate(n->as_Vector()->length() == 2); 3985 match(Set dst (ReplicateS src)); 3986 format %{ "movd $dst,$src\n\t" 3987 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3988 ins_encode %{ 3989 __ movdl($dst$$XMMRegister, $src$$Register); 3990 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3991 %} 3992 ins_pipe( fpu_reg_reg ); 3993 %} 3994 3995 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3996 instruct Repl2S_imm(vecS dst, immI con) %{ 3997 predicate(n->as_Vector()->length() == 2); 3998 match(Set dst (ReplicateS con)); 3999 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4000 ins_encode %{ 4001 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4002 %} 4003 ins_pipe( fpu_reg_reg ); 4004 %} 4005 4006 instruct Repl4S_imm(vecD dst, immI con) %{ 4007 predicate(n->as_Vector()->length() == 4); 4008 match(Set dst (ReplicateS con)); 4009 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4010 ins_encode %{ 4011 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4012 %} 4013 ins_pipe( fpu_reg_reg ); 4014 %} 4015 4016 // Replicate char/short (2 byte) scalar zero to be vector 4017 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4018 predicate(n->as_Vector()->length() == 2); 4019 match(Set dst (ReplicateS zero)); 4020 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4021 ins_encode %{ 4022 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4023 %} 4024 ins_pipe( fpu_reg_reg ); 4025 %} 4026 4027 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4028 predicate(n->as_Vector()->length() == 4); 4029 match(Set dst (ReplicateS zero)); 4030 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4031 ins_encode %{ 4032 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4033 %} 4034 ins_pipe( fpu_reg_reg ); 4035 %} 4036 4037 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4038 predicate(n->as_Vector()->length() == 8); 4039 match(Set dst (ReplicateS zero)); 4040 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4041 ins_encode %{ 4042 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4043 %} 4044 ins_pipe( fpu_reg_reg ); 4045 %} 4046 4047 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4048 predicate(n->as_Vector()->length() == 16); 4049 match(Set dst (ReplicateS zero)); 4050 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4051 ins_encode %{ 4052 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4053 int vector_len = 1; 4054 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4055 %} 4056 ins_pipe( fpu_reg_reg ); 4057 %} 4058 4059 // Replicate integer (4 byte) scalar to be vector 4060 instruct Repl2I(vecD dst, rRegI src) %{ 4061 predicate(n->as_Vector()->length() == 2); 4062 match(Set dst (ReplicateI src)); 4063 format %{ "movd $dst,$src\n\t" 4064 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4065 ins_encode %{ 4066 __ movdl($dst$$XMMRegister, $src$$Register); 4067 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4068 %} 4069 ins_pipe( fpu_reg_reg ); 4070 %} 4071 4072 // Integer could be loaded into xmm register directly from memory. 4073 instruct Repl2I_mem(vecD dst, memory mem) %{ 4074 predicate(n->as_Vector()->length() == 2); 4075 match(Set dst (ReplicateI (LoadI mem))); 4076 format %{ "movd $dst,$mem\n\t" 4077 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4078 ins_encode %{ 4079 __ movdl($dst$$XMMRegister, $mem$$Address); 4080 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4081 %} 4082 ins_pipe( fpu_reg_reg ); 4083 %} 4084 4085 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4086 instruct Repl2I_imm(vecD dst, immI con) %{ 4087 predicate(n->as_Vector()->length() == 2); 4088 match(Set dst (ReplicateI con)); 4089 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4090 ins_encode %{ 4091 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4092 %} 4093 ins_pipe( fpu_reg_reg ); 4094 %} 4095 4096 // Replicate integer (4 byte) scalar zero to be vector 4097 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4098 predicate(n->as_Vector()->length() == 2); 4099 match(Set dst (ReplicateI zero)); 4100 format %{ "pxor $dst,$dst\t! replicate2I" %} 4101 ins_encode %{ 4102 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4103 %} 4104 ins_pipe( fpu_reg_reg ); 4105 %} 4106 4107 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4108 predicate(n->as_Vector()->length() == 4); 4109 match(Set dst (ReplicateI zero)); 4110 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4111 ins_encode %{ 4112 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4113 %} 4114 ins_pipe( fpu_reg_reg ); 4115 %} 4116 4117 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4118 predicate(n->as_Vector()->length() == 8); 4119 match(Set dst (ReplicateI zero)); 4120 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4121 ins_encode %{ 4122 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4123 int vector_len = 1; 4124 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4125 %} 4126 ins_pipe( fpu_reg_reg ); 4127 %} 4128 4129 // Replicate long (8 byte) scalar to be vector 4130 #ifdef _LP64 4131 instruct Repl2L(vecX dst, rRegL src) %{ 4132 predicate(n->as_Vector()->length() == 2); 4133 match(Set dst (ReplicateL src)); 4134 format %{ "movdq $dst,$src\n\t" 4135 "punpcklqdq $dst,$dst\t! replicate2L" %} 4136 ins_encode %{ 4137 __ movdq($dst$$XMMRegister, $src$$Register); 4138 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4139 %} 4140 ins_pipe( pipe_slow ); 4141 %} 4142 #else // _LP64 4143 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4144 predicate(n->as_Vector()->length() == 2); 4145 match(Set dst (ReplicateL src)); 4146 effect(TEMP dst, USE src, TEMP tmp); 4147 format %{ "movdl $dst,$src.lo\n\t" 4148 "movdl $tmp,$src.hi\n\t" 4149 "punpckldq $dst,$tmp\n\t" 4150 "punpcklqdq $dst,$dst\t! replicate2L"%} 4151 ins_encode %{ 4152 __ movdl($dst$$XMMRegister, $src$$Register); 4153 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4154 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4155 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4156 %} 4157 ins_pipe( pipe_slow ); 4158 %} 4159 #endif // _LP64 4160 4161 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4162 instruct Repl2L_imm(vecX dst, immL con) %{ 4163 predicate(n->as_Vector()->length() == 2); 4164 match(Set dst (ReplicateL con)); 4165 format %{ "movq $dst,[$constantaddress]\n\t" 4166 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4167 ins_encode %{ 4168 __ movq($dst$$XMMRegister, $constantaddress($con)); 4169 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4170 %} 4171 ins_pipe( pipe_slow ); 4172 %} 4173 4174 // Replicate long (8 byte) scalar zero to be vector 4175 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4176 predicate(n->as_Vector()->length() == 2); 4177 match(Set dst (ReplicateL zero)); 4178 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4179 ins_encode %{ 4180 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4181 %} 4182 ins_pipe( fpu_reg_reg ); 4183 %} 4184 4185 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4186 predicate(n->as_Vector()->length() == 4); 4187 match(Set dst (ReplicateL zero)); 4188 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4189 ins_encode %{ 4190 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4191 int vector_len = 1; 4192 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4193 %} 4194 ins_pipe( fpu_reg_reg ); 4195 %} 4196 4197 // Replicate float (4 byte) scalar to be vector 4198 instruct Repl2F(vecD dst, vlRegF src) %{ 4199 predicate(n->as_Vector()->length() == 2); 4200 match(Set dst (ReplicateF src)); 4201 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4202 ins_encode %{ 4203 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4204 %} 4205 ins_pipe( fpu_reg_reg ); 4206 %} 4207 4208 instruct Repl4F(vecX dst, vlRegF src) %{ 4209 predicate(n->as_Vector()->length() == 4); 4210 match(Set dst (ReplicateF src)); 4211 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4212 ins_encode %{ 4213 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 // Replicate double (8 bytes) scalar to be vector 4219 instruct Repl2D(vecX dst, vlRegD src) %{ 4220 predicate(n->as_Vector()->length() == 2); 4221 match(Set dst (ReplicateD src)); 4222 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4223 ins_encode %{ 4224 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4225 %} 4226 ins_pipe( pipe_slow ); 4227 %} 4228 4229 // ====================EVEX REPLICATE============================================= 4230 4231 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4232 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4233 match(Set dst (ReplicateB (LoadB mem))); 4234 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4235 ins_encode %{ 4236 int vector_len = 0; 4237 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4243 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4244 match(Set dst (ReplicateB (LoadB mem))); 4245 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4246 ins_encode %{ 4247 int vector_len = 0; 4248 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4249 %} 4250 ins_pipe( pipe_slow ); 4251 %} 4252 4253 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4254 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4255 match(Set dst (ReplicateB src)); 4256 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4257 ins_encode %{ 4258 int vector_len = 0; 4259 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4260 %} 4261 ins_pipe( pipe_slow ); 4262 %} 4263 4264 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4265 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4266 match(Set dst (ReplicateB (LoadB mem))); 4267 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4268 ins_encode %{ 4269 int vector_len = 0; 4270 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4276 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4277 match(Set dst (ReplicateB src)); 4278 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4279 ins_encode %{ 4280 int vector_len = 1; 4281 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4282 %} 4283 ins_pipe( pipe_slow ); 4284 %} 4285 4286 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4287 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4288 match(Set dst (ReplicateB (LoadB mem))); 4289 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4290 ins_encode %{ 4291 int vector_len = 1; 4292 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4293 %} 4294 ins_pipe( pipe_slow ); 4295 %} 4296 4297 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4298 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4299 match(Set dst (ReplicateB src)); 4300 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4301 ins_encode %{ 4302 int vector_len = 2; 4303 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4304 %} 4305 ins_pipe( pipe_slow ); 4306 %} 4307 4308 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4309 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4310 match(Set dst (ReplicateB (LoadB mem))); 4311 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4312 ins_encode %{ 4313 int vector_len = 2; 4314 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4315 %} 4316 ins_pipe( pipe_slow ); 4317 %} 4318 4319 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4320 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4321 match(Set dst (ReplicateB con)); 4322 format %{ "movq $dst,[$constantaddress]\n\t" 4323 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4324 ins_encode %{ 4325 int vector_len = 0; 4326 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4327 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4328 %} 4329 ins_pipe( pipe_slow ); 4330 %} 4331 4332 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4333 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4334 match(Set dst (ReplicateB con)); 4335 format %{ "movq $dst,[$constantaddress]\n\t" 4336 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4337 ins_encode %{ 4338 int vector_len = 1; 4339 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4340 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4341 %} 4342 ins_pipe( pipe_slow ); 4343 %} 4344 4345 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4346 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4347 match(Set dst (ReplicateB con)); 4348 format %{ "movq $dst,[$constantaddress]\n\t" 4349 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4350 ins_encode %{ 4351 int vector_len = 2; 4352 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4353 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4354 %} 4355 ins_pipe( pipe_slow ); 4356 %} 4357 4358 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4359 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4360 match(Set dst (ReplicateB zero)); 4361 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4362 ins_encode %{ 4363 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4364 int vector_len = 2; 4365 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4366 %} 4367 ins_pipe( fpu_reg_reg ); 4368 %} 4369 4370 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4371 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4372 match(Set dst (ReplicateS src)); 4373 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4374 ins_encode %{ 4375 int vector_len = 0; 4376 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4377 %} 4378 ins_pipe( pipe_slow ); 4379 %} 4380 4381 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4382 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4383 match(Set dst (ReplicateS (LoadS mem))); 4384 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4385 ins_encode %{ 4386 int vector_len = 0; 4387 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4388 %} 4389 ins_pipe( pipe_slow ); 4390 %} 4391 4392 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4393 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4394 match(Set dst (ReplicateS src)); 4395 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4396 ins_encode %{ 4397 int vector_len = 0; 4398 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4399 %} 4400 ins_pipe( pipe_slow ); 4401 %} 4402 4403 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4404 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4405 match(Set dst (ReplicateS (LoadS mem))); 4406 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4407 ins_encode %{ 4408 int vector_len = 0; 4409 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4410 %} 4411 ins_pipe( pipe_slow ); 4412 %} 4413 4414 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4415 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4416 match(Set dst (ReplicateS src)); 4417 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4418 ins_encode %{ 4419 int vector_len = 1; 4420 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4421 %} 4422 ins_pipe( pipe_slow ); 4423 %} 4424 4425 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4426 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4427 match(Set dst (ReplicateS (LoadS mem))); 4428 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4429 ins_encode %{ 4430 int vector_len = 1; 4431 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4432 %} 4433 ins_pipe( pipe_slow ); 4434 %} 4435 4436 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4437 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4438 match(Set dst (ReplicateS src)); 4439 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4440 ins_encode %{ 4441 int vector_len = 2; 4442 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4448 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4449 match(Set dst (ReplicateS (LoadS mem))); 4450 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4451 ins_encode %{ 4452 int vector_len = 2; 4453 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4454 %} 4455 ins_pipe( pipe_slow ); 4456 %} 4457 4458 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4459 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4460 match(Set dst (ReplicateS con)); 4461 format %{ "movq $dst,[$constantaddress]\n\t" 4462 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4463 ins_encode %{ 4464 int vector_len = 0; 4465 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4466 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4467 %} 4468 ins_pipe( pipe_slow ); 4469 %} 4470 4471 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4472 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4473 match(Set dst (ReplicateS con)); 4474 format %{ "movq $dst,[$constantaddress]\n\t" 4475 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4476 ins_encode %{ 4477 int vector_len = 1; 4478 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4479 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4480 %} 4481 ins_pipe( pipe_slow ); 4482 %} 4483 4484 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4485 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4486 match(Set dst (ReplicateS con)); 4487 format %{ "movq $dst,[$constantaddress]\n\t" 4488 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4489 ins_encode %{ 4490 int vector_len = 2; 4491 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4492 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4493 %} 4494 ins_pipe( pipe_slow ); 4495 %} 4496 4497 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4498 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4499 match(Set dst (ReplicateS zero)); 4500 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4501 ins_encode %{ 4502 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4503 int vector_len = 2; 4504 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4505 %} 4506 ins_pipe( fpu_reg_reg ); 4507 %} 4508 4509 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4510 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4511 match(Set dst (ReplicateI src)); 4512 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4513 ins_encode %{ 4514 int vector_len = 0; 4515 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4521 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4522 match(Set dst (ReplicateI (LoadI mem))); 4523 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4524 ins_encode %{ 4525 int vector_len = 0; 4526 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4532 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4533 match(Set dst (ReplicateI src)); 4534 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4535 ins_encode %{ 4536 int vector_len = 1; 4537 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4538 %} 4539 ins_pipe( pipe_slow ); 4540 %} 4541 4542 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4543 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4544 match(Set dst (ReplicateI (LoadI mem))); 4545 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4546 ins_encode %{ 4547 int vector_len = 1; 4548 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4549 %} 4550 ins_pipe( pipe_slow ); 4551 %} 4552 4553 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4554 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4555 match(Set dst (ReplicateI src)); 4556 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4557 ins_encode %{ 4558 int vector_len = 2; 4559 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4560 %} 4561 ins_pipe( pipe_slow ); 4562 %} 4563 4564 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4565 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4566 match(Set dst (ReplicateI (LoadI mem))); 4567 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4568 ins_encode %{ 4569 int vector_len = 2; 4570 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4571 %} 4572 ins_pipe( pipe_slow ); 4573 %} 4574 4575 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4576 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4577 match(Set dst (ReplicateI con)); 4578 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4579 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4580 ins_encode %{ 4581 int vector_len = 0; 4582 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4583 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4589 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4590 match(Set dst (ReplicateI con)); 4591 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4592 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4593 ins_encode %{ 4594 int vector_len = 1; 4595 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4596 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4597 %} 4598 ins_pipe( pipe_slow ); 4599 %} 4600 4601 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4602 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4603 match(Set dst (ReplicateI con)); 4604 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4605 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4606 ins_encode %{ 4607 int vector_len = 2; 4608 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4609 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4610 %} 4611 ins_pipe( pipe_slow ); 4612 %} 4613 4614 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4615 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4616 match(Set dst (ReplicateI zero)); 4617 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4618 ins_encode %{ 4619 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4620 int vector_len = 2; 4621 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4622 %} 4623 ins_pipe( fpu_reg_reg ); 4624 %} 4625 4626 // Replicate long (8 byte) scalar to be vector 4627 #ifdef _LP64 4628 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4629 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4630 match(Set dst (ReplicateL src)); 4631 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4632 ins_encode %{ 4633 int vector_len = 1; 4634 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4635 %} 4636 ins_pipe( pipe_slow ); 4637 %} 4638 4639 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4640 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4641 match(Set dst (ReplicateL src)); 4642 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4643 ins_encode %{ 4644 int vector_len = 2; 4645 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 #else // _LP64 4650 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4651 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4652 match(Set dst (ReplicateL src)); 4653 effect(TEMP dst, USE src, TEMP tmp); 4654 format %{ "movdl $dst,$src.lo\n\t" 4655 "movdl $tmp,$src.hi\n\t" 4656 "punpckldq $dst,$tmp\n\t" 4657 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4658 ins_encode %{ 4659 int vector_len = 1; 4660 __ movdl($dst$$XMMRegister, $src$$Register); 4661 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4662 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4663 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 4668 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4669 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4670 match(Set dst (ReplicateL src)); 4671 effect(TEMP dst, USE src, TEMP tmp); 4672 format %{ "movdl $dst,$src.lo\n\t" 4673 "movdl $tmp,$src.hi\n\t" 4674 "punpckldq $dst,$tmp\n\t" 4675 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4676 ins_encode %{ 4677 int vector_len = 2; 4678 __ movdl($dst$$XMMRegister, $src$$Register); 4679 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4680 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4681 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4682 %} 4683 ins_pipe( pipe_slow ); 4684 %} 4685 #endif // _LP64 4686 4687 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4688 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4689 match(Set dst (ReplicateL con)); 4690 format %{ "movq $dst,[$constantaddress]\n\t" 4691 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4692 ins_encode %{ 4693 int vector_len = 1; 4694 __ movq($dst$$XMMRegister, $constantaddress($con)); 4695 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4696 %} 4697 ins_pipe( pipe_slow ); 4698 %} 4699 4700 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4701 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4702 match(Set dst (ReplicateL con)); 4703 format %{ "movq $dst,[$constantaddress]\n\t" 4704 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4705 ins_encode %{ 4706 int vector_len = 2; 4707 __ movq($dst$$XMMRegister, $constantaddress($con)); 4708 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4709 %} 4710 ins_pipe( pipe_slow ); 4711 %} 4712 4713 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4714 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4715 match(Set dst (ReplicateL (LoadL mem))); 4716 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4717 ins_encode %{ 4718 int vector_len = 0; 4719 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4720 %} 4721 ins_pipe( pipe_slow ); 4722 %} 4723 4724 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4725 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4726 match(Set dst (ReplicateL (LoadL mem))); 4727 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4728 ins_encode %{ 4729 int vector_len = 1; 4730 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4731 %} 4732 ins_pipe( pipe_slow ); 4733 %} 4734 4735 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4736 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4737 match(Set dst (ReplicateL (LoadL mem))); 4738 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4739 ins_encode %{ 4740 int vector_len = 2; 4741 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4742 %} 4743 ins_pipe( pipe_slow ); 4744 %} 4745 4746 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4747 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4748 match(Set dst (ReplicateL zero)); 4749 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4750 ins_encode %{ 4751 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4752 int vector_len = 2; 4753 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4754 %} 4755 ins_pipe( fpu_reg_reg ); 4756 %} 4757 4758 instruct Repl8F_evex(vecY dst, regF src) %{ 4759 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4760 match(Set dst (ReplicateF src)); 4761 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4762 ins_encode %{ 4763 int vector_len = 1; 4764 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4765 %} 4766 ins_pipe( pipe_slow ); 4767 %} 4768 4769 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4770 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4771 match(Set dst (ReplicateF (LoadF mem))); 4772 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4773 ins_encode %{ 4774 int vector_len = 1; 4775 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4776 %} 4777 ins_pipe( pipe_slow ); 4778 %} 4779 4780 instruct Repl16F_evex(vecZ dst, regF src) %{ 4781 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4782 match(Set dst (ReplicateF src)); 4783 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4784 ins_encode %{ 4785 int vector_len = 2; 4786 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4787 %} 4788 ins_pipe( pipe_slow ); 4789 %} 4790 4791 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4792 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4793 match(Set dst (ReplicateF (LoadF mem))); 4794 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4795 ins_encode %{ 4796 int vector_len = 2; 4797 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4798 %} 4799 ins_pipe( pipe_slow ); 4800 %} 4801 4802 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4803 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4804 match(Set dst (ReplicateF zero)); 4805 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4806 ins_encode %{ 4807 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4808 int vector_len = 2; 4809 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4810 %} 4811 ins_pipe( fpu_reg_reg ); 4812 %} 4813 4814 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4815 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4816 match(Set dst (ReplicateF zero)); 4817 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4818 ins_encode %{ 4819 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4820 int vector_len = 2; 4821 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4822 %} 4823 ins_pipe( fpu_reg_reg ); 4824 %} 4825 4826 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4827 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4828 match(Set dst (ReplicateF zero)); 4829 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4830 ins_encode %{ 4831 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4832 int vector_len = 2; 4833 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4834 %} 4835 ins_pipe( fpu_reg_reg ); 4836 %} 4837 4838 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4839 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4840 match(Set dst (ReplicateF zero)); 4841 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4842 ins_encode %{ 4843 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4844 int vector_len = 2; 4845 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4846 %} 4847 ins_pipe( fpu_reg_reg ); 4848 %} 4849 4850 instruct Repl4D_evex(vecY dst, regD src) %{ 4851 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4852 match(Set dst (ReplicateD src)); 4853 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4854 ins_encode %{ 4855 int vector_len = 1; 4856 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4857 %} 4858 ins_pipe( pipe_slow ); 4859 %} 4860 4861 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4862 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4863 match(Set dst (ReplicateD (LoadD mem))); 4864 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4865 ins_encode %{ 4866 int vector_len = 1; 4867 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 instruct Repl8D_evex(vecZ dst, regD src) %{ 4873 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4874 match(Set dst (ReplicateD src)); 4875 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4876 ins_encode %{ 4877 int vector_len = 2; 4878 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4879 %} 4880 ins_pipe( pipe_slow ); 4881 %} 4882 4883 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4884 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4885 match(Set dst (ReplicateD (LoadD mem))); 4886 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4887 ins_encode %{ 4888 int vector_len = 2; 4889 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4890 %} 4891 ins_pipe( pipe_slow ); 4892 %} 4893 4894 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4895 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4896 match(Set dst (ReplicateD zero)); 4897 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4898 ins_encode %{ 4899 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4900 int vector_len = 2; 4901 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4902 %} 4903 ins_pipe( fpu_reg_reg ); 4904 %} 4905 4906 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4907 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4908 match(Set dst (ReplicateD zero)); 4909 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4910 ins_encode %{ 4911 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4912 int vector_len = 2; 4913 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4914 %} 4915 ins_pipe( fpu_reg_reg ); 4916 %} 4917 4918 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4919 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4920 match(Set dst (ReplicateD zero)); 4921 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4922 ins_encode %{ 4923 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4924 int vector_len = 2; 4925 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4926 %} 4927 ins_pipe( fpu_reg_reg ); 4928 %} 4929 4930 // ====================REDUCTION ARITHMETIC======================================= 4931 4932 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4933 predicate(UseSSE > 2 && UseAVX == 0); 4934 match(Set dst (AddReductionVI src1 src2)); 4935 effect(TEMP tmp2, TEMP tmp); 4936 format %{ "movdqu $tmp2,$src2\n\t" 4937 "phaddd $tmp2,$tmp2\n\t" 4938 "movd $tmp,$src1\n\t" 4939 "paddd $tmp,$tmp2\n\t" 4940 "movd $dst,$tmp\t! add reduction2I" %} 4941 ins_encode %{ 4942 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4943 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4944 __ movdl($tmp$$XMMRegister, $src1$$Register); 4945 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4946 __ movdl($dst$$Register, $tmp$$XMMRegister); 4947 %} 4948 ins_pipe( pipe_slow ); 4949 %} 4950 4951 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4952 predicate(VM_Version::supports_avxonly()); 4953 match(Set dst (AddReductionVI src1 src2)); 4954 effect(TEMP tmp, TEMP tmp2); 4955 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4956 "movd $tmp2,$src1\n\t" 4957 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4958 "movd $dst,$tmp2\t! add reduction2I" %} 4959 ins_encode %{ 4960 int vector_len = 0; 4961 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4962 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4963 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4964 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4965 %} 4966 ins_pipe( pipe_slow ); 4967 %} 4968 4969 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4970 predicate(UseAVX > 2); 4971 match(Set dst (AddReductionVI src1 src2)); 4972 effect(TEMP tmp, TEMP tmp2); 4973 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4974 "vpaddd $tmp,$src2,$tmp2\n\t" 4975 "movd $tmp2,$src1\n\t" 4976 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4977 "movd $dst,$tmp2\t! add reduction2I" %} 4978 ins_encode %{ 4979 int vector_len = 0; 4980 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4981 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4982 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4983 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4984 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4985 %} 4986 ins_pipe( pipe_slow ); 4987 %} 4988 4989 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4990 predicate(UseSSE > 2 && UseAVX == 0); 4991 match(Set dst (AddReductionVI src1 src2)); 4992 effect(TEMP tmp, TEMP tmp2); 4993 format %{ "movdqu $tmp,$src2\n\t" 4994 "phaddd $tmp,$tmp\n\t" 4995 "phaddd $tmp,$tmp\n\t" 4996 "movd $tmp2,$src1\n\t" 4997 "paddd $tmp2,$tmp\n\t" 4998 "movd $dst,$tmp2\t! add reduction4I" %} 4999 ins_encode %{ 5000 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5001 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5002 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5003 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5004 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5005 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5006 %} 5007 ins_pipe( pipe_slow ); 5008 %} 5009 5010 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5011 predicate(VM_Version::supports_avxonly()); 5012 match(Set dst (AddReductionVI src1 src2)); 5013 effect(TEMP tmp, TEMP tmp2); 5014 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5015 "vphaddd $tmp,$tmp,$tmp\n\t" 5016 "movd $tmp2,$src1\n\t" 5017 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5018 "movd $dst,$tmp2\t! add reduction4I" %} 5019 ins_encode %{ 5020 int vector_len = 0; 5021 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5022 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5023 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5024 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5025 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5026 %} 5027 ins_pipe( pipe_slow ); 5028 %} 5029 5030 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5031 predicate(UseAVX > 2); 5032 match(Set dst (AddReductionVI src1 src2)); 5033 effect(TEMP tmp, TEMP tmp2); 5034 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5035 "vpaddd $tmp,$src2,$tmp2\n\t" 5036 "pshufd $tmp2,$tmp,0x1\n\t" 5037 "vpaddd $tmp,$tmp,$tmp2\n\t" 5038 "movd $tmp2,$src1\n\t" 5039 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5040 "movd $dst,$tmp2\t! add reduction4I" %} 5041 ins_encode %{ 5042 int vector_len = 0; 5043 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5044 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5045 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5046 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5047 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5048 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5049 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5050 %} 5051 ins_pipe( pipe_slow ); 5052 %} 5053 5054 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5055 predicate(VM_Version::supports_avxonly()); 5056 match(Set dst (AddReductionVI src1 src2)); 5057 effect(TEMP tmp, TEMP tmp2); 5058 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5059 "vphaddd $tmp,$tmp,$tmp2\n\t" 5060 "vextracti128_high $tmp2,$tmp\n\t" 5061 "vpaddd $tmp,$tmp,$tmp2\n\t" 5062 "movd $tmp2,$src1\n\t" 5063 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5064 "movd $dst,$tmp2\t! add reduction8I" %} 5065 ins_encode %{ 5066 int vector_len = 1; 5067 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5068 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5069 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5070 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5071 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5072 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5073 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5074 %} 5075 ins_pipe( pipe_slow ); 5076 %} 5077 5078 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5079 predicate(UseAVX > 2); 5080 match(Set dst (AddReductionVI src1 src2)); 5081 effect(TEMP tmp, TEMP tmp2); 5082 format %{ "vextracti128_high $tmp,$src2\n\t" 5083 "vpaddd $tmp,$tmp,$src2\n\t" 5084 "pshufd $tmp2,$tmp,0xE\n\t" 5085 "vpaddd $tmp,$tmp,$tmp2\n\t" 5086 "pshufd $tmp2,$tmp,0x1\n\t" 5087 "vpaddd $tmp,$tmp,$tmp2\n\t" 5088 "movd $tmp2,$src1\n\t" 5089 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5090 "movd $dst,$tmp2\t! add reduction8I" %} 5091 ins_encode %{ 5092 int vector_len = 0; 5093 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5094 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5095 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5096 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5097 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5098 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5099 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5100 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5101 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5102 %} 5103 ins_pipe( pipe_slow ); 5104 %} 5105 5106 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5107 predicate(UseAVX > 2); 5108 match(Set dst (AddReductionVI src1 src2)); 5109 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5110 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5111 "vpaddd $tmp3,$tmp3,$src2\n\t" 5112 "vextracti128_high $tmp,$tmp3\n\t" 5113 "vpaddd $tmp,$tmp,$tmp3\n\t" 5114 "pshufd $tmp2,$tmp,0xE\n\t" 5115 "vpaddd $tmp,$tmp,$tmp2\n\t" 5116 "pshufd $tmp2,$tmp,0x1\n\t" 5117 "vpaddd $tmp,$tmp,$tmp2\n\t" 5118 "movd $tmp2,$src1\n\t" 5119 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5120 "movd $dst,$tmp2\t! mul reduction16I" %} 5121 ins_encode %{ 5122 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5123 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5124 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5125 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5126 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5127 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5128 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5129 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5130 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5131 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5132 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5133 %} 5134 ins_pipe( pipe_slow ); 5135 %} 5136 5137 #ifdef _LP64 5138 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5139 predicate(UseAVX > 2); 5140 match(Set dst (AddReductionVL src1 src2)); 5141 effect(TEMP tmp, TEMP tmp2); 5142 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5143 "vpaddq $tmp,$src2,$tmp2\n\t" 5144 "movdq $tmp2,$src1\n\t" 5145 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5146 "movdq $dst,$tmp2\t! add reduction2L" %} 5147 ins_encode %{ 5148 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5149 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5150 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5151 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5152 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5153 %} 5154 ins_pipe( pipe_slow ); 5155 %} 5156 5157 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5158 predicate(UseAVX > 2); 5159 match(Set dst (AddReductionVL src1 src2)); 5160 effect(TEMP tmp, TEMP tmp2); 5161 format %{ "vextracti128_high $tmp,$src2\n\t" 5162 "vpaddq $tmp2,$tmp,$src2\n\t" 5163 "pshufd $tmp,$tmp2,0xE\n\t" 5164 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5165 "movdq $tmp,$src1\n\t" 5166 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5167 "movdq $dst,$tmp2\t! add reduction4L" %} 5168 ins_encode %{ 5169 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5170 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5171 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5172 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5173 __ movdq($tmp$$XMMRegister, $src1$$Register); 5174 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5175 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5176 %} 5177 ins_pipe( pipe_slow ); 5178 %} 5179 5180 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5181 predicate(UseAVX > 2); 5182 match(Set dst (AddReductionVL src1 src2)); 5183 effect(TEMP tmp, TEMP tmp2); 5184 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5185 "vpaddq $tmp2,$tmp2,$src2\n\t" 5186 "vextracti128_high $tmp,$tmp2\n\t" 5187 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5188 "pshufd $tmp,$tmp2,0xE\n\t" 5189 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5190 "movdq $tmp,$src1\n\t" 5191 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5192 "movdq $dst,$tmp2\t! add reduction8L" %} 5193 ins_encode %{ 5194 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5195 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5196 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5197 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5198 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5199 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5200 __ movdq($tmp$$XMMRegister, $src1$$Register); 5201 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5202 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5203 %} 5204 ins_pipe( pipe_slow ); 5205 %} 5206 #endif 5207 5208 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5209 predicate(UseSSE >= 1 && UseAVX == 0); 5210 match(Set dst (AddReductionVF dst src2)); 5211 effect(TEMP dst, TEMP tmp); 5212 format %{ "addss $dst,$src2\n\t" 5213 "pshufd $tmp,$src2,0x01\n\t" 5214 "addss $dst,$tmp\t! add reduction2F" %} 5215 ins_encode %{ 5216 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5217 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5218 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5219 %} 5220 ins_pipe( pipe_slow ); 5221 %} 5222 5223 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5224 predicate(UseAVX > 0); 5225 match(Set dst (AddReductionVF dst src2)); 5226 effect(TEMP dst, TEMP tmp); 5227 format %{ "vaddss $dst,$dst,$src2\n\t" 5228 "pshufd $tmp,$src2,0x01\n\t" 5229 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5230 ins_encode %{ 5231 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5232 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5233 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5234 %} 5235 ins_pipe( pipe_slow ); 5236 %} 5237 5238 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5239 predicate(UseSSE >= 1 && UseAVX == 0); 5240 match(Set dst (AddReductionVF dst src2)); 5241 effect(TEMP dst, TEMP tmp); 5242 format %{ "addss $dst,$src2\n\t" 5243 "pshufd $tmp,$src2,0x01\n\t" 5244 "addss $dst,$tmp\n\t" 5245 "pshufd $tmp,$src2,0x02\n\t" 5246 "addss $dst,$tmp\n\t" 5247 "pshufd $tmp,$src2,0x03\n\t" 5248 "addss $dst,$tmp\t! add reduction4F" %} 5249 ins_encode %{ 5250 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5251 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5252 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5253 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5254 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5255 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5256 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5257 %} 5258 ins_pipe( pipe_slow ); 5259 %} 5260 5261 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5262 predicate(UseAVX > 0); 5263 match(Set dst (AddReductionVF dst src2)); 5264 effect(TEMP tmp, TEMP dst); 5265 format %{ "vaddss $dst,dst,$src2\n\t" 5266 "pshufd $tmp,$src2,0x01\n\t" 5267 "vaddss $dst,$dst,$tmp\n\t" 5268 "pshufd $tmp,$src2,0x02\n\t" 5269 "vaddss $dst,$dst,$tmp\n\t" 5270 "pshufd $tmp,$src2,0x03\n\t" 5271 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5272 ins_encode %{ 5273 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5274 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5275 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5276 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5277 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5278 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5279 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5280 %} 5281 ins_pipe( pipe_slow ); 5282 %} 5283 5284 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5285 predicate(UseAVX > 0); 5286 match(Set dst (AddReductionVF dst src2)); 5287 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5288 format %{ "vaddss $dst,$dst,$src2\n\t" 5289 "pshufd $tmp,$src2,0x01\n\t" 5290 "vaddss $dst,$dst,$tmp\n\t" 5291 "pshufd $tmp,$src2,0x02\n\t" 5292 "vaddss $dst,$dst,$tmp\n\t" 5293 "pshufd $tmp,$src2,0x03\n\t" 5294 "vaddss $dst,$dst,$tmp\n\t" 5295 "vextractf128_high $tmp2,$src2\n\t" 5296 "vaddss $dst,$dst,$tmp2\n\t" 5297 "pshufd $tmp,$tmp2,0x01\n\t" 5298 "vaddss $dst,$dst,$tmp\n\t" 5299 "pshufd $tmp,$tmp2,0x02\n\t" 5300 "vaddss $dst,$dst,$tmp\n\t" 5301 "pshufd $tmp,$tmp2,0x03\n\t" 5302 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5303 ins_encode %{ 5304 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5305 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5306 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5307 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5308 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5309 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5310 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5311 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5312 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5313 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5314 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5315 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5316 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5317 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5318 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5319 %} 5320 ins_pipe( pipe_slow ); 5321 %} 5322 5323 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5324 predicate(UseAVX > 2); 5325 match(Set dst (AddReductionVF dst src2)); 5326 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5327 format %{ "vaddss $dst,$dst,$src2\n\t" 5328 "pshufd $tmp,$src2,0x01\n\t" 5329 "vaddss $dst,$dst,$tmp\n\t" 5330 "pshufd $tmp,$src2,0x02\n\t" 5331 "vaddss $dst,$dst,$tmp\n\t" 5332 "pshufd $tmp,$src2,0x03\n\t" 5333 "vaddss $dst,$dst,$tmp\n\t" 5334 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5335 "vaddss $dst,$dst,$tmp2\n\t" 5336 "pshufd $tmp,$tmp2,0x01\n\t" 5337 "vaddss $dst,$dst,$tmp\n\t" 5338 "pshufd $tmp,$tmp2,0x02\n\t" 5339 "vaddss $dst,$dst,$tmp\n\t" 5340 "pshufd $tmp,$tmp2,0x03\n\t" 5341 "vaddss $dst,$dst,$tmp\n\t" 5342 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5343 "vaddss $dst,$dst,$tmp2\n\t" 5344 "pshufd $tmp,$tmp2,0x01\n\t" 5345 "vaddss $dst,$dst,$tmp\n\t" 5346 "pshufd $tmp,$tmp2,0x02\n\t" 5347 "vaddss $dst,$dst,$tmp\n\t" 5348 "pshufd $tmp,$tmp2,0x03\n\t" 5349 "vaddss $dst,$dst,$tmp\n\t" 5350 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5351 "vaddss $dst,$dst,$tmp2\n\t" 5352 "pshufd $tmp,$tmp2,0x01\n\t" 5353 "vaddss $dst,$dst,$tmp\n\t" 5354 "pshufd $tmp,$tmp2,0x02\n\t" 5355 "vaddss $dst,$dst,$tmp\n\t" 5356 "pshufd $tmp,$tmp2,0x03\n\t" 5357 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5358 ins_encode %{ 5359 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5360 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5361 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5362 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5363 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5364 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5365 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5366 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5367 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5368 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5369 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5370 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5371 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5372 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5373 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5374 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5375 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5376 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5377 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5378 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5379 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5380 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5381 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5382 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5383 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5384 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5385 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5386 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5387 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5388 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5389 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5395 predicate(UseSSE >= 1 && UseAVX == 0); 5396 match(Set dst (AddReductionVD dst src2)); 5397 effect(TEMP tmp, TEMP dst); 5398 format %{ "addsd $dst,$src2\n\t" 5399 "pshufd $tmp,$src2,0xE\n\t" 5400 "addsd $dst,$tmp\t! add reduction2D" %} 5401 ins_encode %{ 5402 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5403 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5404 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5410 predicate(UseAVX > 0); 5411 match(Set dst (AddReductionVD dst src2)); 5412 effect(TEMP tmp, TEMP dst); 5413 format %{ "vaddsd $dst,$dst,$src2\n\t" 5414 "pshufd $tmp,$src2,0xE\n\t" 5415 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5416 ins_encode %{ 5417 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5418 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5419 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5420 %} 5421 ins_pipe( pipe_slow ); 5422 %} 5423 5424 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5425 predicate(UseAVX > 0); 5426 match(Set dst (AddReductionVD dst src2)); 5427 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5428 format %{ "vaddsd $dst,$dst,$src2\n\t" 5429 "pshufd $tmp,$src2,0xE\n\t" 5430 "vaddsd $dst,$dst,$tmp\n\t" 5431 "vextractf128 $tmp2,$src2,0x1\n\t" 5432 "vaddsd $dst,$dst,$tmp2\n\t" 5433 "pshufd $tmp,$tmp2,0xE\n\t" 5434 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5435 ins_encode %{ 5436 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5437 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5438 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5439 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5440 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5441 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5442 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5443 %} 5444 ins_pipe( pipe_slow ); 5445 %} 5446 5447 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5448 predicate(UseAVX > 2); 5449 match(Set dst (AddReductionVD dst src2)); 5450 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5451 format %{ "vaddsd $dst,$dst,$src2\n\t" 5452 "pshufd $tmp,$src2,0xE\n\t" 5453 "vaddsd $dst,$dst,$tmp\n\t" 5454 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5455 "vaddsd $dst,$dst,$tmp2\n\t" 5456 "pshufd $tmp,$tmp2,0xE\n\t" 5457 "vaddsd $dst,$dst,$tmp\n\t" 5458 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5459 "vaddsd $dst,$dst,$tmp2\n\t" 5460 "pshufd $tmp,$tmp2,0xE\n\t" 5461 "vaddsd $dst,$dst,$tmp\n\t" 5462 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5463 "vaddsd $dst,$dst,$tmp2\n\t" 5464 "pshufd $tmp,$tmp2,0xE\n\t" 5465 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5466 ins_encode %{ 5467 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5468 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5469 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5470 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5471 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5472 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5473 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5474 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5475 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5476 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5477 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5478 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5479 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5480 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5481 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5482 %} 5483 ins_pipe( pipe_slow ); 5484 %} 5485 5486 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5487 predicate(UseSSE > 3 && UseAVX == 0); 5488 match(Set dst (MulReductionVI src1 src2)); 5489 effect(TEMP tmp, TEMP tmp2); 5490 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5491 "pmulld $tmp2,$src2\n\t" 5492 "movd $tmp,$src1\n\t" 5493 "pmulld $tmp2,$tmp\n\t" 5494 "movd $dst,$tmp2\t! mul reduction2I" %} 5495 ins_encode %{ 5496 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5497 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5498 __ movdl($tmp$$XMMRegister, $src1$$Register); 5499 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5500 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5501 %} 5502 ins_pipe( pipe_slow ); 5503 %} 5504 5505 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5506 predicate(UseAVX > 0); 5507 match(Set dst (MulReductionVI src1 src2)); 5508 effect(TEMP tmp, TEMP tmp2); 5509 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5510 "vpmulld $tmp,$src2,$tmp2\n\t" 5511 "movd $tmp2,$src1\n\t" 5512 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5513 "movd $dst,$tmp2\t! mul reduction2I" %} 5514 ins_encode %{ 5515 int vector_len = 0; 5516 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5517 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5518 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5519 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5520 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5521 %} 5522 ins_pipe( pipe_slow ); 5523 %} 5524 5525 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5526 predicate(UseSSE > 3 && UseAVX == 0); 5527 match(Set dst (MulReductionVI src1 src2)); 5528 effect(TEMP tmp, TEMP tmp2); 5529 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5530 "pmulld $tmp2,$src2\n\t" 5531 "pshufd $tmp,$tmp2,0x1\n\t" 5532 "pmulld $tmp2,$tmp\n\t" 5533 "movd $tmp,$src1\n\t" 5534 "pmulld $tmp2,$tmp\n\t" 5535 "movd $dst,$tmp2\t! mul reduction4I" %} 5536 ins_encode %{ 5537 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5538 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5539 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5540 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5541 __ movdl($tmp$$XMMRegister, $src1$$Register); 5542 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5543 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5544 %} 5545 ins_pipe( pipe_slow ); 5546 %} 5547 5548 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5549 predicate(UseAVX > 0); 5550 match(Set dst (MulReductionVI src1 src2)); 5551 effect(TEMP tmp, TEMP tmp2); 5552 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5553 "vpmulld $tmp,$src2,$tmp2\n\t" 5554 "pshufd $tmp2,$tmp,0x1\n\t" 5555 "vpmulld $tmp,$tmp,$tmp2\n\t" 5556 "movd $tmp2,$src1\n\t" 5557 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5558 "movd $dst,$tmp2\t! mul reduction4I" %} 5559 ins_encode %{ 5560 int vector_len = 0; 5561 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5562 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5563 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5564 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5565 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5566 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5567 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5568 %} 5569 ins_pipe( pipe_slow ); 5570 %} 5571 5572 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5573 predicate(UseAVX > 1); 5574 match(Set dst (MulReductionVI src1 src2)); 5575 effect(TEMP tmp, TEMP tmp2); 5576 format %{ "vextracti128_high $tmp,$src2\n\t" 5577 "vpmulld $tmp,$tmp,$src2\n\t" 5578 "pshufd $tmp2,$tmp,0xE\n\t" 5579 "vpmulld $tmp,$tmp,$tmp2\n\t" 5580 "pshufd $tmp2,$tmp,0x1\n\t" 5581 "vpmulld $tmp,$tmp,$tmp2\n\t" 5582 "movd $tmp2,$src1\n\t" 5583 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5584 "movd $dst,$tmp2\t! mul reduction8I" %} 5585 ins_encode %{ 5586 int vector_len = 0; 5587 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5588 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5589 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5590 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5591 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5592 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5593 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5594 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5595 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5601 predicate(UseAVX > 2); 5602 match(Set dst (MulReductionVI src1 src2)); 5603 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5604 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5605 "vpmulld $tmp3,$tmp3,$src2\n\t" 5606 "vextracti128_high $tmp,$tmp3\n\t" 5607 "vpmulld $tmp,$tmp,$src2\n\t" 5608 "pshufd $tmp2,$tmp,0xE\n\t" 5609 "vpmulld $tmp,$tmp,$tmp2\n\t" 5610 "pshufd $tmp2,$tmp,0x1\n\t" 5611 "vpmulld $tmp,$tmp,$tmp2\n\t" 5612 "movd $tmp2,$src1\n\t" 5613 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5614 "movd $dst,$tmp2\t! mul reduction16I" %} 5615 ins_encode %{ 5616 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5617 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5618 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5619 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5620 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5621 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5622 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5623 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5624 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5625 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5626 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5627 %} 5628 ins_pipe( pipe_slow ); 5629 %} 5630 5631 #ifdef _LP64 5632 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5633 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5634 match(Set dst (MulReductionVL src1 src2)); 5635 effect(TEMP tmp, TEMP tmp2); 5636 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5637 "vpmullq $tmp,$src2,$tmp2\n\t" 5638 "movdq $tmp2,$src1\n\t" 5639 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5640 "movdq $dst,$tmp2\t! mul reduction2L" %} 5641 ins_encode %{ 5642 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5643 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5644 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5645 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5646 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5652 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5653 match(Set dst (MulReductionVL src1 src2)); 5654 effect(TEMP tmp, TEMP tmp2); 5655 format %{ "vextracti128_high $tmp,$src2\n\t" 5656 "vpmullq $tmp2,$tmp,$src2\n\t" 5657 "pshufd $tmp,$tmp2,0xE\n\t" 5658 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5659 "movdq $tmp,$src1\n\t" 5660 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5661 "movdq $dst,$tmp2\t! mul reduction4L" %} 5662 ins_encode %{ 5663 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5664 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5665 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5666 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5667 __ movdq($tmp$$XMMRegister, $src1$$Register); 5668 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5669 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5670 %} 5671 ins_pipe( pipe_slow ); 5672 %} 5673 5674 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5675 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5676 match(Set dst (MulReductionVL src1 src2)); 5677 effect(TEMP tmp, TEMP tmp2); 5678 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5679 "vpmullq $tmp2,$tmp2,$src2\n\t" 5680 "vextracti128_high $tmp,$tmp2\n\t" 5681 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5682 "pshufd $tmp,$tmp2,0xE\n\t" 5683 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5684 "movdq $tmp,$src1\n\t" 5685 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5686 "movdq $dst,$tmp2\t! mul reduction8L" %} 5687 ins_encode %{ 5688 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5689 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5690 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5691 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5692 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5693 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5694 __ movdq($tmp$$XMMRegister, $src1$$Register); 5695 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5696 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5697 %} 5698 ins_pipe( pipe_slow ); 5699 %} 5700 #endif 5701 5702 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5703 predicate(UseSSE >= 1 && UseAVX == 0); 5704 match(Set dst (MulReductionVF dst src2)); 5705 effect(TEMP dst, TEMP tmp); 5706 format %{ "mulss $dst,$src2\n\t" 5707 "pshufd $tmp,$src2,0x01\n\t" 5708 "mulss $dst,$tmp\t! mul reduction2F" %} 5709 ins_encode %{ 5710 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5711 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5712 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5713 %} 5714 ins_pipe( pipe_slow ); 5715 %} 5716 5717 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5718 predicate(UseAVX > 0); 5719 match(Set dst (MulReductionVF dst src2)); 5720 effect(TEMP tmp, TEMP dst); 5721 format %{ "vmulss $dst,$dst,$src2\n\t" 5722 "pshufd $tmp,$src2,0x01\n\t" 5723 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5724 ins_encode %{ 5725 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5726 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5727 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5733 predicate(UseSSE >= 1 && UseAVX == 0); 5734 match(Set dst (MulReductionVF dst src2)); 5735 effect(TEMP dst, TEMP tmp); 5736 format %{ "mulss $dst,$src2\n\t" 5737 "pshufd $tmp,$src2,0x01\n\t" 5738 "mulss $dst,$tmp\n\t" 5739 "pshufd $tmp,$src2,0x02\n\t" 5740 "mulss $dst,$tmp\n\t" 5741 "pshufd $tmp,$src2,0x03\n\t" 5742 "mulss $dst,$tmp\t! mul reduction4F" %} 5743 ins_encode %{ 5744 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5745 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5746 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5747 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5748 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5749 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5750 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5751 %} 5752 ins_pipe( pipe_slow ); 5753 %} 5754 5755 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5756 predicate(UseAVX > 0); 5757 match(Set dst (MulReductionVF dst src2)); 5758 effect(TEMP tmp, TEMP dst); 5759 format %{ "vmulss $dst,$dst,$src2\n\t" 5760 "pshufd $tmp,$src2,0x01\n\t" 5761 "vmulss $dst,$dst,$tmp\n\t" 5762 "pshufd $tmp,$src2,0x02\n\t" 5763 "vmulss $dst,$dst,$tmp\n\t" 5764 "pshufd $tmp,$src2,0x03\n\t" 5765 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5766 ins_encode %{ 5767 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5768 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5769 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5770 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5771 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5772 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5773 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5774 %} 5775 ins_pipe( pipe_slow ); 5776 %} 5777 5778 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5779 predicate(UseAVX > 0); 5780 match(Set dst (MulReductionVF dst src2)); 5781 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5782 format %{ "vmulss $dst,$dst,$src2\n\t" 5783 "pshufd $tmp,$src2,0x01\n\t" 5784 "vmulss $dst,$dst,$tmp\n\t" 5785 "pshufd $tmp,$src2,0x02\n\t" 5786 "vmulss $dst,$dst,$tmp\n\t" 5787 "pshufd $tmp,$src2,0x03\n\t" 5788 "vmulss $dst,$dst,$tmp\n\t" 5789 "vextractf128_high $tmp2,$src2\n\t" 5790 "vmulss $dst,$dst,$tmp2\n\t" 5791 "pshufd $tmp,$tmp2,0x01\n\t" 5792 "vmulss $dst,$dst,$tmp\n\t" 5793 "pshufd $tmp,$tmp2,0x02\n\t" 5794 "vmulss $dst,$dst,$tmp\n\t" 5795 "pshufd $tmp,$tmp2,0x03\n\t" 5796 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5797 ins_encode %{ 5798 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5799 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5800 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5801 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5802 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5803 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5804 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5805 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5806 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5807 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5808 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5809 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5810 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5811 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5812 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5818 predicate(UseAVX > 2); 5819 match(Set dst (MulReductionVF dst src2)); 5820 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5821 format %{ "vmulss $dst,$dst,$src2\n\t" 5822 "pshufd $tmp,$src2,0x01\n\t" 5823 "vmulss $dst,$dst,$tmp\n\t" 5824 "pshufd $tmp,$src2,0x02\n\t" 5825 "vmulss $dst,$dst,$tmp\n\t" 5826 "pshufd $tmp,$src2,0x03\n\t" 5827 "vmulss $dst,$dst,$tmp\n\t" 5828 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5829 "vmulss $dst,$dst,$tmp2\n\t" 5830 "pshufd $tmp,$tmp2,0x01\n\t" 5831 "vmulss $dst,$dst,$tmp\n\t" 5832 "pshufd $tmp,$tmp2,0x02\n\t" 5833 "vmulss $dst,$dst,$tmp\n\t" 5834 "pshufd $tmp,$tmp2,0x03\n\t" 5835 "vmulss $dst,$dst,$tmp\n\t" 5836 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5837 "vmulss $dst,$dst,$tmp2\n\t" 5838 "pshufd $tmp,$tmp2,0x01\n\t" 5839 "vmulss $dst,$dst,$tmp\n\t" 5840 "pshufd $tmp,$tmp2,0x02\n\t" 5841 "vmulss $dst,$dst,$tmp\n\t" 5842 "pshufd $tmp,$tmp2,0x03\n\t" 5843 "vmulss $dst,$dst,$tmp\n\t" 5844 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5845 "vmulss $dst,$dst,$tmp2\n\t" 5846 "pshufd $tmp,$tmp2,0x01\n\t" 5847 "vmulss $dst,$dst,$tmp\n\t" 5848 "pshufd $tmp,$tmp2,0x02\n\t" 5849 "vmulss $dst,$dst,$tmp\n\t" 5850 "pshufd $tmp,$tmp2,0x03\n\t" 5851 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5852 ins_encode %{ 5853 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5854 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5855 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5856 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5857 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5858 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5859 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5860 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5861 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5862 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5863 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5864 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5865 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5866 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5867 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5868 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5869 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5870 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5871 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5872 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5873 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5874 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5875 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5876 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5877 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5878 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5879 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5880 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5881 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5882 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5883 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5884 %} 5885 ins_pipe( pipe_slow ); 5886 %} 5887 5888 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5889 predicate(UseSSE >= 1 && UseAVX == 0); 5890 match(Set dst (MulReductionVD dst src2)); 5891 effect(TEMP dst, TEMP tmp); 5892 format %{ "mulsd $dst,$src2\n\t" 5893 "pshufd $tmp,$src2,0xE\n\t" 5894 "mulsd $dst,$tmp\t! mul reduction2D" %} 5895 ins_encode %{ 5896 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5897 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5898 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5899 %} 5900 ins_pipe( pipe_slow ); 5901 %} 5902 5903 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5904 predicate(UseAVX > 0); 5905 match(Set dst (MulReductionVD dst src2)); 5906 effect(TEMP tmp, TEMP dst); 5907 format %{ "vmulsd $dst,$dst,$src2\n\t" 5908 "pshufd $tmp,$src2,0xE\n\t" 5909 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5910 ins_encode %{ 5911 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5912 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5913 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5914 %} 5915 ins_pipe( pipe_slow ); 5916 %} 5917 5918 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5919 predicate(UseAVX > 0); 5920 match(Set dst (MulReductionVD dst src2)); 5921 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5922 format %{ "vmulsd $dst,$dst,$src2\n\t" 5923 "pshufd $tmp,$src2,0xE\n\t" 5924 "vmulsd $dst,$dst,$tmp\n\t" 5925 "vextractf128_high $tmp2,$src2\n\t" 5926 "vmulsd $dst,$dst,$tmp2\n\t" 5927 "pshufd $tmp,$tmp2,0xE\n\t" 5928 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5929 ins_encode %{ 5930 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5931 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5932 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5933 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5934 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5935 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5936 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5942 predicate(UseAVX > 2); 5943 match(Set dst (MulReductionVD dst src2)); 5944 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5945 format %{ "vmulsd $dst,$dst,$src2\n\t" 5946 "pshufd $tmp,$src2,0xE\n\t" 5947 "vmulsd $dst,$dst,$tmp\n\t" 5948 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5949 "vmulsd $dst,$dst,$tmp2\n\t" 5950 "pshufd $tmp,$src2,0xE\n\t" 5951 "vmulsd $dst,$dst,$tmp\n\t" 5952 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5953 "vmulsd $dst,$dst,$tmp2\n\t" 5954 "pshufd $tmp,$tmp2,0xE\n\t" 5955 "vmulsd $dst,$dst,$tmp\n\t" 5956 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5957 "vmulsd $dst,$dst,$tmp2\n\t" 5958 "pshufd $tmp,$tmp2,0xE\n\t" 5959 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5960 ins_encode %{ 5961 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5962 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5963 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5964 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5965 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5966 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5967 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5968 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5969 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5970 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5971 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5972 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5973 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5974 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5975 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5976 %} 5977 ins_pipe( pipe_slow ); 5978 %} 5979 5980 // ====================VECTOR ARITHMETIC======================================= 5981 5982 // --------------------------------- ADD -------------------------------------- 5983 5984 // Bytes vector add 5985 instruct vadd4B(vecS dst, vecS src) %{ 5986 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5987 match(Set dst (AddVB dst src)); 5988 format %{ "paddb $dst,$src\t! add packed4B" %} 5989 ins_encode %{ 5990 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5991 %} 5992 ins_pipe( pipe_slow ); 5993 %} 5994 5995 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5996 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5997 match(Set dst (AddVB src1 src2)); 5998 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5999 ins_encode %{ 6000 int vector_len = 0; 6001 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6002 %} 6003 ins_pipe( pipe_slow ); 6004 %} 6005 6006 6007 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6008 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6009 match(Set dst (AddVB src (LoadVector mem))); 6010 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6011 ins_encode %{ 6012 int vector_len = 0; 6013 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6014 %} 6015 ins_pipe( pipe_slow ); 6016 %} 6017 6018 instruct vadd8B(vecD dst, vecD src) %{ 6019 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6020 match(Set dst (AddVB dst src)); 6021 format %{ "paddb $dst,$src\t! add packed8B" %} 6022 ins_encode %{ 6023 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6024 %} 6025 ins_pipe( pipe_slow ); 6026 %} 6027 6028 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6029 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6030 match(Set dst (AddVB src1 src2)); 6031 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6032 ins_encode %{ 6033 int vector_len = 0; 6034 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6035 %} 6036 ins_pipe( pipe_slow ); 6037 %} 6038 6039 6040 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6041 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6042 match(Set dst (AddVB src (LoadVector mem))); 6043 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6044 ins_encode %{ 6045 int vector_len = 0; 6046 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6047 %} 6048 ins_pipe( pipe_slow ); 6049 %} 6050 6051 instruct vadd16B(vecX dst, vecX src) %{ 6052 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6053 match(Set dst (AddVB dst src)); 6054 format %{ "paddb $dst,$src\t! add packed16B" %} 6055 ins_encode %{ 6056 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6057 %} 6058 ins_pipe( pipe_slow ); 6059 %} 6060 6061 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6062 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6063 match(Set dst (AddVB src1 src2)); 6064 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6065 ins_encode %{ 6066 int vector_len = 0; 6067 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6068 %} 6069 ins_pipe( pipe_slow ); 6070 %} 6071 6072 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6073 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6074 match(Set dst (AddVB src (LoadVector mem))); 6075 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6076 ins_encode %{ 6077 int vector_len = 0; 6078 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6079 %} 6080 ins_pipe( pipe_slow ); 6081 %} 6082 6083 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6084 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6085 match(Set dst (AddVB src1 src2)); 6086 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6087 ins_encode %{ 6088 int vector_len = 1; 6089 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6090 %} 6091 ins_pipe( pipe_slow ); 6092 %} 6093 6094 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6095 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6096 match(Set dst (AddVB src (LoadVector mem))); 6097 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6098 ins_encode %{ 6099 int vector_len = 1; 6100 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6101 %} 6102 ins_pipe( pipe_slow ); 6103 %} 6104 6105 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6106 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6107 match(Set dst (AddVB src1 src2)); 6108 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6109 ins_encode %{ 6110 int vector_len = 2; 6111 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6112 %} 6113 ins_pipe( pipe_slow ); 6114 %} 6115 6116 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6117 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6118 match(Set dst (AddVB src (LoadVector mem))); 6119 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6120 ins_encode %{ 6121 int vector_len = 2; 6122 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6123 %} 6124 ins_pipe( pipe_slow ); 6125 %} 6126 6127 // Shorts/Chars vector add 6128 instruct vadd2S(vecS dst, vecS src) %{ 6129 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6130 match(Set dst (AddVS dst src)); 6131 format %{ "paddw $dst,$src\t! add packed2S" %} 6132 ins_encode %{ 6133 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6134 %} 6135 ins_pipe( pipe_slow ); 6136 %} 6137 6138 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6139 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6140 match(Set dst (AddVS src1 src2)); 6141 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6142 ins_encode %{ 6143 int vector_len = 0; 6144 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6150 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6151 match(Set dst (AddVS src (LoadVector mem))); 6152 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6153 ins_encode %{ 6154 int vector_len = 0; 6155 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6156 %} 6157 ins_pipe( pipe_slow ); 6158 %} 6159 6160 instruct vadd4S(vecD dst, vecD src) %{ 6161 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6162 match(Set dst (AddVS dst src)); 6163 format %{ "paddw $dst,$src\t! add packed4S" %} 6164 ins_encode %{ 6165 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6166 %} 6167 ins_pipe( pipe_slow ); 6168 %} 6169 6170 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6171 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6172 match(Set dst (AddVS src1 src2)); 6173 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6174 ins_encode %{ 6175 int vector_len = 0; 6176 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6177 %} 6178 ins_pipe( pipe_slow ); 6179 %} 6180 6181 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6182 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6183 match(Set dst (AddVS src (LoadVector mem))); 6184 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6185 ins_encode %{ 6186 int vector_len = 0; 6187 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6188 %} 6189 ins_pipe( pipe_slow ); 6190 %} 6191 6192 instruct vadd8S(vecX dst, vecX src) %{ 6193 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6194 match(Set dst (AddVS dst src)); 6195 format %{ "paddw $dst,$src\t! add packed8S" %} 6196 ins_encode %{ 6197 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6198 %} 6199 ins_pipe( pipe_slow ); 6200 %} 6201 6202 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6203 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6204 match(Set dst (AddVS src1 src2)); 6205 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6206 ins_encode %{ 6207 int vector_len = 0; 6208 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6209 %} 6210 ins_pipe( pipe_slow ); 6211 %} 6212 6213 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6214 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6215 match(Set dst (AddVS src (LoadVector mem))); 6216 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6217 ins_encode %{ 6218 int vector_len = 0; 6219 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6225 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6226 match(Set dst (AddVS src1 src2)); 6227 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6228 ins_encode %{ 6229 int vector_len = 1; 6230 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6231 %} 6232 ins_pipe( pipe_slow ); 6233 %} 6234 6235 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6236 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6237 match(Set dst (AddVS src (LoadVector mem))); 6238 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6239 ins_encode %{ 6240 int vector_len = 1; 6241 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6247 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6248 match(Set dst (AddVS src1 src2)); 6249 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6250 ins_encode %{ 6251 int vector_len = 2; 6252 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6253 %} 6254 ins_pipe( pipe_slow ); 6255 %} 6256 6257 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6258 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6259 match(Set dst (AddVS src (LoadVector mem))); 6260 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6261 ins_encode %{ 6262 int vector_len = 2; 6263 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 // Integers vector add 6269 instruct vadd2I(vecD dst, vecD src) %{ 6270 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6271 match(Set dst (AddVI dst src)); 6272 format %{ "paddd $dst,$src\t! add packed2I" %} 6273 ins_encode %{ 6274 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6280 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6281 match(Set dst (AddVI src1 src2)); 6282 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6283 ins_encode %{ 6284 int vector_len = 0; 6285 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6286 %} 6287 ins_pipe( pipe_slow ); 6288 %} 6289 6290 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6291 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6292 match(Set dst (AddVI src (LoadVector mem))); 6293 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6294 ins_encode %{ 6295 int vector_len = 0; 6296 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct vadd4I(vecX dst, vecX src) %{ 6302 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6303 match(Set dst (AddVI dst src)); 6304 format %{ "paddd $dst,$src\t! add packed4I" %} 6305 ins_encode %{ 6306 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6307 %} 6308 ins_pipe( pipe_slow ); 6309 %} 6310 6311 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6312 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6313 match(Set dst (AddVI src1 src2)); 6314 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6315 ins_encode %{ 6316 int vector_len = 0; 6317 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6318 %} 6319 ins_pipe( pipe_slow ); 6320 %} 6321 6322 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6323 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6324 match(Set dst (AddVI src (LoadVector mem))); 6325 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6326 ins_encode %{ 6327 int vector_len = 0; 6328 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6334 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6335 match(Set dst (AddVI src1 src2)); 6336 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6337 ins_encode %{ 6338 int vector_len = 1; 6339 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6340 %} 6341 ins_pipe( pipe_slow ); 6342 %} 6343 6344 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6345 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6346 match(Set dst (AddVI src (LoadVector mem))); 6347 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6348 ins_encode %{ 6349 int vector_len = 1; 6350 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6351 %} 6352 ins_pipe( pipe_slow ); 6353 %} 6354 6355 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6356 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6357 match(Set dst (AddVI src1 src2)); 6358 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6359 ins_encode %{ 6360 int vector_len = 2; 6361 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6367 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6368 match(Set dst (AddVI src (LoadVector mem))); 6369 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6370 ins_encode %{ 6371 int vector_len = 2; 6372 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 // Longs vector add 6378 instruct vadd2L(vecX dst, vecX src) %{ 6379 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6380 match(Set dst (AddVL dst src)); 6381 format %{ "paddq $dst,$src\t! add packed2L" %} 6382 ins_encode %{ 6383 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6389 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6390 match(Set dst (AddVL src1 src2)); 6391 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6392 ins_encode %{ 6393 int vector_len = 0; 6394 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6395 %} 6396 ins_pipe( pipe_slow ); 6397 %} 6398 6399 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6400 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6401 match(Set dst (AddVL src (LoadVector mem))); 6402 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6403 ins_encode %{ 6404 int vector_len = 0; 6405 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6411 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6412 match(Set dst (AddVL src1 src2)); 6413 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6414 ins_encode %{ 6415 int vector_len = 1; 6416 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6417 %} 6418 ins_pipe( pipe_slow ); 6419 %} 6420 6421 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6422 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6423 match(Set dst (AddVL src (LoadVector mem))); 6424 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6425 ins_encode %{ 6426 int vector_len = 1; 6427 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6428 %} 6429 ins_pipe( pipe_slow ); 6430 %} 6431 6432 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6433 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6434 match(Set dst (AddVL src1 src2)); 6435 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6436 ins_encode %{ 6437 int vector_len = 2; 6438 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6439 %} 6440 ins_pipe( pipe_slow ); 6441 %} 6442 6443 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6444 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6445 match(Set dst (AddVL src (LoadVector mem))); 6446 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6447 ins_encode %{ 6448 int vector_len = 2; 6449 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6450 %} 6451 ins_pipe( pipe_slow ); 6452 %} 6453 6454 // Floats vector add 6455 instruct vadd2F(vecD dst, vecD src) %{ 6456 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6457 match(Set dst (AddVF dst src)); 6458 format %{ "addps $dst,$src\t! add packed2F" %} 6459 ins_encode %{ 6460 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6466 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6467 match(Set dst (AddVF src1 src2)); 6468 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6469 ins_encode %{ 6470 int vector_len = 0; 6471 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6472 %} 6473 ins_pipe( pipe_slow ); 6474 %} 6475 6476 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6477 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6478 match(Set dst (AddVF src (LoadVector mem))); 6479 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6480 ins_encode %{ 6481 int vector_len = 0; 6482 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6483 %} 6484 ins_pipe( pipe_slow ); 6485 %} 6486 6487 instruct vadd4F(vecX dst, vecX src) %{ 6488 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6489 match(Set dst (AddVF dst src)); 6490 format %{ "addps $dst,$src\t! add packed4F" %} 6491 ins_encode %{ 6492 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6493 %} 6494 ins_pipe( pipe_slow ); 6495 %} 6496 6497 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6498 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6499 match(Set dst (AddVF src1 src2)); 6500 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6501 ins_encode %{ 6502 int vector_len = 0; 6503 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6504 %} 6505 ins_pipe( pipe_slow ); 6506 %} 6507 6508 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6509 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6510 match(Set dst (AddVF src (LoadVector mem))); 6511 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6512 ins_encode %{ 6513 int vector_len = 0; 6514 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6515 %} 6516 ins_pipe( pipe_slow ); 6517 %} 6518 6519 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6520 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6521 match(Set dst (AddVF src1 src2)); 6522 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6523 ins_encode %{ 6524 int vector_len = 1; 6525 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6526 %} 6527 ins_pipe( pipe_slow ); 6528 %} 6529 6530 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6531 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6532 match(Set dst (AddVF src (LoadVector mem))); 6533 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6534 ins_encode %{ 6535 int vector_len = 1; 6536 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6537 %} 6538 ins_pipe( pipe_slow ); 6539 %} 6540 6541 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6542 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6543 match(Set dst (AddVF src1 src2)); 6544 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6545 ins_encode %{ 6546 int vector_len = 2; 6547 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6548 %} 6549 ins_pipe( pipe_slow ); 6550 %} 6551 6552 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6553 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6554 match(Set dst (AddVF src (LoadVector mem))); 6555 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6556 ins_encode %{ 6557 int vector_len = 2; 6558 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6559 %} 6560 ins_pipe( pipe_slow ); 6561 %} 6562 6563 // Doubles vector add 6564 instruct vadd2D(vecX dst, vecX src) %{ 6565 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6566 match(Set dst (AddVD dst src)); 6567 format %{ "addpd $dst,$src\t! add packed2D" %} 6568 ins_encode %{ 6569 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6570 %} 6571 ins_pipe( pipe_slow ); 6572 %} 6573 6574 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6575 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6576 match(Set dst (AddVD src1 src2)); 6577 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6578 ins_encode %{ 6579 int vector_len = 0; 6580 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6581 %} 6582 ins_pipe( pipe_slow ); 6583 %} 6584 6585 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6586 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6587 match(Set dst (AddVD src (LoadVector mem))); 6588 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6589 ins_encode %{ 6590 int vector_len = 0; 6591 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6592 %} 6593 ins_pipe( pipe_slow ); 6594 %} 6595 6596 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6597 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6598 match(Set dst (AddVD src1 src2)); 6599 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6600 ins_encode %{ 6601 int vector_len = 1; 6602 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6603 %} 6604 ins_pipe( pipe_slow ); 6605 %} 6606 6607 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6608 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6609 match(Set dst (AddVD src (LoadVector mem))); 6610 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6611 ins_encode %{ 6612 int vector_len = 1; 6613 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6614 %} 6615 ins_pipe( pipe_slow ); 6616 %} 6617 6618 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6619 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6620 match(Set dst (AddVD src1 src2)); 6621 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6622 ins_encode %{ 6623 int vector_len = 2; 6624 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6625 %} 6626 ins_pipe( pipe_slow ); 6627 %} 6628 6629 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6630 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6631 match(Set dst (AddVD src (LoadVector mem))); 6632 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6633 ins_encode %{ 6634 int vector_len = 2; 6635 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 // --------------------------------- SUB -------------------------------------- 6641 6642 // Bytes vector sub 6643 instruct vsub4B(vecS dst, vecS src) %{ 6644 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6645 match(Set dst (SubVB dst src)); 6646 format %{ "psubb $dst,$src\t! sub packed4B" %} 6647 ins_encode %{ 6648 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6649 %} 6650 ins_pipe( pipe_slow ); 6651 %} 6652 6653 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6654 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6655 match(Set dst (SubVB src1 src2)); 6656 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6657 ins_encode %{ 6658 int vector_len = 0; 6659 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6660 %} 6661 ins_pipe( pipe_slow ); 6662 %} 6663 6664 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6665 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6666 match(Set dst (SubVB src (LoadVector mem))); 6667 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6668 ins_encode %{ 6669 int vector_len = 0; 6670 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6671 %} 6672 ins_pipe( pipe_slow ); 6673 %} 6674 6675 instruct vsub8B(vecD dst, vecD src) %{ 6676 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6677 match(Set dst (SubVB dst src)); 6678 format %{ "psubb $dst,$src\t! sub packed8B" %} 6679 ins_encode %{ 6680 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6681 %} 6682 ins_pipe( pipe_slow ); 6683 %} 6684 6685 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6686 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6687 match(Set dst (SubVB src1 src2)); 6688 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6689 ins_encode %{ 6690 int vector_len = 0; 6691 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6692 %} 6693 ins_pipe( pipe_slow ); 6694 %} 6695 6696 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6697 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6698 match(Set dst (SubVB src (LoadVector mem))); 6699 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6700 ins_encode %{ 6701 int vector_len = 0; 6702 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6703 %} 6704 ins_pipe( pipe_slow ); 6705 %} 6706 6707 instruct vsub16B(vecX dst, vecX src) %{ 6708 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6709 match(Set dst (SubVB dst src)); 6710 format %{ "psubb $dst,$src\t! sub packed16B" %} 6711 ins_encode %{ 6712 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6713 %} 6714 ins_pipe( pipe_slow ); 6715 %} 6716 6717 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6718 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6719 match(Set dst (SubVB src1 src2)); 6720 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6721 ins_encode %{ 6722 int vector_len = 0; 6723 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6724 %} 6725 ins_pipe( pipe_slow ); 6726 %} 6727 6728 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6729 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6730 match(Set dst (SubVB src (LoadVector mem))); 6731 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6732 ins_encode %{ 6733 int vector_len = 0; 6734 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6735 %} 6736 ins_pipe( pipe_slow ); 6737 %} 6738 6739 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6740 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6741 match(Set dst (SubVB src1 src2)); 6742 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6743 ins_encode %{ 6744 int vector_len = 1; 6745 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6746 %} 6747 ins_pipe( pipe_slow ); 6748 %} 6749 6750 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6751 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6752 match(Set dst (SubVB src (LoadVector mem))); 6753 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6754 ins_encode %{ 6755 int vector_len = 1; 6756 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6762 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6763 match(Set dst (SubVB src1 src2)); 6764 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6765 ins_encode %{ 6766 int vector_len = 2; 6767 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6773 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6774 match(Set dst (SubVB src (LoadVector mem))); 6775 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6776 ins_encode %{ 6777 int vector_len = 2; 6778 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6779 %} 6780 ins_pipe( pipe_slow ); 6781 %} 6782 6783 // Shorts/Chars vector sub 6784 instruct vsub2S(vecS dst, vecS src) %{ 6785 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6786 match(Set dst (SubVS dst src)); 6787 format %{ "psubw $dst,$src\t! sub packed2S" %} 6788 ins_encode %{ 6789 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6790 %} 6791 ins_pipe( pipe_slow ); 6792 %} 6793 6794 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6795 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6796 match(Set dst (SubVS src1 src2)); 6797 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6798 ins_encode %{ 6799 int vector_len = 0; 6800 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6806 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6807 match(Set dst (SubVS src (LoadVector mem))); 6808 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6809 ins_encode %{ 6810 int vector_len = 0; 6811 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6812 %} 6813 ins_pipe( pipe_slow ); 6814 %} 6815 6816 instruct vsub4S(vecD dst, vecD src) %{ 6817 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6818 match(Set dst (SubVS dst src)); 6819 format %{ "psubw $dst,$src\t! sub packed4S" %} 6820 ins_encode %{ 6821 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6827 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6828 match(Set dst (SubVS src1 src2)); 6829 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6830 ins_encode %{ 6831 int vector_len = 0; 6832 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6838 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6839 match(Set dst (SubVS src (LoadVector mem))); 6840 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6841 ins_encode %{ 6842 int vector_len = 0; 6843 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6844 %} 6845 ins_pipe( pipe_slow ); 6846 %} 6847 6848 instruct vsub8S(vecX dst, vecX src) %{ 6849 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6850 match(Set dst (SubVS dst src)); 6851 format %{ "psubw $dst,$src\t! sub packed8S" %} 6852 ins_encode %{ 6853 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6854 %} 6855 ins_pipe( pipe_slow ); 6856 %} 6857 6858 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6859 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6860 match(Set dst (SubVS src1 src2)); 6861 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6862 ins_encode %{ 6863 int vector_len = 0; 6864 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6865 %} 6866 ins_pipe( pipe_slow ); 6867 %} 6868 6869 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6870 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6871 match(Set dst (SubVS src (LoadVector mem))); 6872 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6873 ins_encode %{ 6874 int vector_len = 0; 6875 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6876 %} 6877 ins_pipe( pipe_slow ); 6878 %} 6879 6880 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6881 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6882 match(Set dst (SubVS src1 src2)); 6883 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6884 ins_encode %{ 6885 int vector_len = 1; 6886 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6887 %} 6888 ins_pipe( pipe_slow ); 6889 %} 6890 6891 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6892 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6893 match(Set dst (SubVS src (LoadVector mem))); 6894 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6895 ins_encode %{ 6896 int vector_len = 1; 6897 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6898 %} 6899 ins_pipe( pipe_slow ); 6900 %} 6901 6902 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6903 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6904 match(Set dst (SubVS src1 src2)); 6905 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6906 ins_encode %{ 6907 int vector_len = 2; 6908 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6909 %} 6910 ins_pipe( pipe_slow ); 6911 %} 6912 6913 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6914 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6915 match(Set dst (SubVS src (LoadVector mem))); 6916 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6917 ins_encode %{ 6918 int vector_len = 2; 6919 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6920 %} 6921 ins_pipe( pipe_slow ); 6922 %} 6923 6924 // Integers vector sub 6925 instruct vsub2I(vecD dst, vecD src) %{ 6926 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6927 match(Set dst (SubVI dst src)); 6928 format %{ "psubd $dst,$src\t! sub packed2I" %} 6929 ins_encode %{ 6930 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6931 %} 6932 ins_pipe( pipe_slow ); 6933 %} 6934 6935 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6936 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6937 match(Set dst (SubVI src1 src2)); 6938 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6939 ins_encode %{ 6940 int vector_len = 0; 6941 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6947 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6948 match(Set dst (SubVI src (LoadVector mem))); 6949 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6950 ins_encode %{ 6951 int vector_len = 0; 6952 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 instruct vsub4I(vecX dst, vecX src) %{ 6958 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6959 match(Set dst (SubVI dst src)); 6960 format %{ "psubd $dst,$src\t! sub packed4I" %} 6961 ins_encode %{ 6962 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6963 %} 6964 ins_pipe( pipe_slow ); 6965 %} 6966 6967 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6969 match(Set dst (SubVI src1 src2)); 6970 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6971 ins_encode %{ 6972 int vector_len = 0; 6973 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6974 %} 6975 ins_pipe( pipe_slow ); 6976 %} 6977 6978 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6980 match(Set dst (SubVI src (LoadVector mem))); 6981 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6982 ins_encode %{ 6983 int vector_len = 0; 6984 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6990 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6991 match(Set dst (SubVI src1 src2)); 6992 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6993 ins_encode %{ 6994 int vector_len = 1; 6995 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6996 %} 6997 ins_pipe( pipe_slow ); 6998 %} 6999 7000 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7001 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7002 match(Set dst (SubVI src (LoadVector mem))); 7003 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7004 ins_encode %{ 7005 int vector_len = 1; 7006 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7007 %} 7008 ins_pipe( pipe_slow ); 7009 %} 7010 7011 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7012 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7013 match(Set dst (SubVI src1 src2)); 7014 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7015 ins_encode %{ 7016 int vector_len = 2; 7017 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7023 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7024 match(Set dst (SubVI src (LoadVector mem))); 7025 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7026 ins_encode %{ 7027 int vector_len = 2; 7028 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7029 %} 7030 ins_pipe( pipe_slow ); 7031 %} 7032 7033 // Longs vector sub 7034 instruct vsub2L(vecX dst, vecX src) %{ 7035 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7036 match(Set dst (SubVL dst src)); 7037 format %{ "psubq $dst,$src\t! sub packed2L" %} 7038 ins_encode %{ 7039 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7040 %} 7041 ins_pipe( pipe_slow ); 7042 %} 7043 7044 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7046 match(Set dst (SubVL src1 src2)); 7047 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7048 ins_encode %{ 7049 int vector_len = 0; 7050 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7056 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7057 match(Set dst (SubVL src (LoadVector mem))); 7058 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7059 ins_encode %{ 7060 int vector_len = 0; 7061 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7062 %} 7063 ins_pipe( pipe_slow ); 7064 %} 7065 7066 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7067 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7068 match(Set dst (SubVL src1 src2)); 7069 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7070 ins_encode %{ 7071 int vector_len = 1; 7072 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7073 %} 7074 ins_pipe( pipe_slow ); 7075 %} 7076 7077 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7078 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7079 match(Set dst (SubVL src (LoadVector mem))); 7080 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7081 ins_encode %{ 7082 int vector_len = 1; 7083 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7084 %} 7085 ins_pipe( pipe_slow ); 7086 %} 7087 7088 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7089 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7090 match(Set dst (SubVL src1 src2)); 7091 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7092 ins_encode %{ 7093 int vector_len = 2; 7094 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7095 %} 7096 ins_pipe( pipe_slow ); 7097 %} 7098 7099 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7100 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7101 match(Set dst (SubVL src (LoadVector mem))); 7102 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7103 ins_encode %{ 7104 int vector_len = 2; 7105 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 // Floats vector sub 7111 instruct vsub2F(vecD dst, vecD src) %{ 7112 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7113 match(Set dst (SubVF dst src)); 7114 format %{ "subps $dst,$src\t! sub packed2F" %} 7115 ins_encode %{ 7116 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7117 %} 7118 ins_pipe( pipe_slow ); 7119 %} 7120 7121 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7122 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7123 match(Set dst (SubVF src1 src2)); 7124 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7125 ins_encode %{ 7126 int vector_len = 0; 7127 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7133 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7134 match(Set dst (SubVF src (LoadVector mem))); 7135 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7136 ins_encode %{ 7137 int vector_len = 0; 7138 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7139 %} 7140 ins_pipe( pipe_slow ); 7141 %} 7142 7143 instruct vsub4F(vecX dst, vecX src) %{ 7144 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7145 match(Set dst (SubVF dst src)); 7146 format %{ "subps $dst,$src\t! sub packed4F" %} 7147 ins_encode %{ 7148 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7149 %} 7150 ins_pipe( pipe_slow ); 7151 %} 7152 7153 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7155 match(Set dst (SubVF src1 src2)); 7156 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7157 ins_encode %{ 7158 int vector_len = 0; 7159 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7160 %} 7161 ins_pipe( pipe_slow ); 7162 %} 7163 7164 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7165 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7166 match(Set dst (SubVF src (LoadVector mem))); 7167 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7168 ins_encode %{ 7169 int vector_len = 0; 7170 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7176 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7177 match(Set dst (SubVF src1 src2)); 7178 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7179 ins_encode %{ 7180 int vector_len = 1; 7181 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7182 %} 7183 ins_pipe( pipe_slow ); 7184 %} 7185 7186 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7187 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7188 match(Set dst (SubVF src (LoadVector mem))); 7189 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7190 ins_encode %{ 7191 int vector_len = 1; 7192 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7198 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7199 match(Set dst (SubVF src1 src2)); 7200 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7201 ins_encode %{ 7202 int vector_len = 2; 7203 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7204 %} 7205 ins_pipe( pipe_slow ); 7206 %} 7207 7208 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7209 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7210 match(Set dst (SubVF src (LoadVector mem))); 7211 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7212 ins_encode %{ 7213 int vector_len = 2; 7214 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 // Doubles vector sub 7220 instruct vsub2D(vecX dst, vecX src) %{ 7221 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7222 match(Set dst (SubVD dst src)); 7223 format %{ "subpd $dst,$src\t! sub packed2D" %} 7224 ins_encode %{ 7225 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7226 %} 7227 ins_pipe( pipe_slow ); 7228 %} 7229 7230 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7231 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7232 match(Set dst (SubVD src1 src2)); 7233 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7234 ins_encode %{ 7235 int vector_len = 0; 7236 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7237 %} 7238 ins_pipe( pipe_slow ); 7239 %} 7240 7241 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7243 match(Set dst (SubVD src (LoadVector mem))); 7244 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7245 ins_encode %{ 7246 int vector_len = 0; 7247 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7253 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7254 match(Set dst (SubVD src1 src2)); 7255 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7256 ins_encode %{ 7257 int vector_len = 1; 7258 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 7262 7263 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7264 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7265 match(Set dst (SubVD src (LoadVector mem))); 7266 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7267 ins_encode %{ 7268 int vector_len = 1; 7269 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7275 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7276 match(Set dst (SubVD src1 src2)); 7277 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7278 ins_encode %{ 7279 int vector_len = 2; 7280 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7286 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7287 match(Set dst (SubVD src (LoadVector mem))); 7288 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7289 ins_encode %{ 7290 int vector_len = 2; 7291 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 // --------------------------------- MUL -------------------------------------- 7297 7298 // Shorts/Chars vector mul 7299 instruct vmul2S(vecS dst, vecS src) %{ 7300 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7301 match(Set dst (MulVS dst src)); 7302 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7303 ins_encode %{ 7304 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7305 %} 7306 ins_pipe( pipe_slow ); 7307 %} 7308 7309 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7310 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7311 match(Set dst (MulVS src1 src2)); 7312 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7313 ins_encode %{ 7314 int vector_len = 0; 7315 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7316 %} 7317 ins_pipe( pipe_slow ); 7318 %} 7319 7320 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7321 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7322 match(Set dst (MulVS src (LoadVector mem))); 7323 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7324 ins_encode %{ 7325 int vector_len = 0; 7326 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7327 %} 7328 ins_pipe( pipe_slow ); 7329 %} 7330 7331 instruct vmul4S(vecD dst, vecD src) %{ 7332 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7333 match(Set dst (MulVS dst src)); 7334 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7335 ins_encode %{ 7336 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7337 %} 7338 ins_pipe( pipe_slow ); 7339 %} 7340 7341 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7342 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7343 match(Set dst (MulVS src1 src2)); 7344 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7345 ins_encode %{ 7346 int vector_len = 0; 7347 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7348 %} 7349 ins_pipe( pipe_slow ); 7350 %} 7351 7352 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7353 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7354 match(Set dst (MulVS src (LoadVector mem))); 7355 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7356 ins_encode %{ 7357 int vector_len = 0; 7358 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7359 %} 7360 ins_pipe( pipe_slow ); 7361 %} 7362 7363 instruct vmul8S(vecX dst, vecX src) %{ 7364 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7365 match(Set dst (MulVS dst src)); 7366 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7367 ins_encode %{ 7368 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7374 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7375 match(Set dst (MulVS src1 src2)); 7376 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7377 ins_encode %{ 7378 int vector_len = 0; 7379 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7385 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7386 match(Set dst (MulVS src (LoadVector mem))); 7387 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7388 ins_encode %{ 7389 int vector_len = 0; 7390 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7396 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7397 match(Set dst (MulVS src1 src2)); 7398 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7399 ins_encode %{ 7400 int vector_len = 1; 7401 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7407 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7408 match(Set dst (MulVS src (LoadVector mem))); 7409 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7410 ins_encode %{ 7411 int vector_len = 1; 7412 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7418 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7419 match(Set dst (MulVS src1 src2)); 7420 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7421 ins_encode %{ 7422 int vector_len = 2; 7423 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7429 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7430 match(Set dst (MulVS src (LoadVector mem))); 7431 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7432 ins_encode %{ 7433 int vector_len = 2; 7434 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 // Integers vector mul (sse4_1) 7440 instruct vmul2I(vecD dst, vecD src) %{ 7441 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7442 match(Set dst (MulVI dst src)); 7443 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7444 ins_encode %{ 7445 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7451 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7452 match(Set dst (MulVI src1 src2)); 7453 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7454 ins_encode %{ 7455 int vector_len = 0; 7456 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7462 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7463 match(Set dst (MulVI src (LoadVector mem))); 7464 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7465 ins_encode %{ 7466 int vector_len = 0; 7467 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vmul4I(vecX dst, vecX src) %{ 7473 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7474 match(Set dst (MulVI dst src)); 7475 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7476 ins_encode %{ 7477 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7478 %} 7479 ins_pipe( pipe_slow ); 7480 %} 7481 7482 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7483 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7484 match(Set dst (MulVI src1 src2)); 7485 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7486 ins_encode %{ 7487 int vector_len = 0; 7488 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7489 %} 7490 ins_pipe( pipe_slow ); 7491 %} 7492 7493 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7494 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7495 match(Set dst (MulVI src (LoadVector mem))); 7496 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7497 ins_encode %{ 7498 int vector_len = 0; 7499 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7505 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7506 match(Set dst (MulVL src1 src2)); 7507 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7508 ins_encode %{ 7509 int vector_len = 0; 7510 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7516 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7517 match(Set dst (MulVL src (LoadVector mem))); 7518 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7519 ins_encode %{ 7520 int vector_len = 0; 7521 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7522 %} 7523 ins_pipe( pipe_slow ); 7524 %} 7525 7526 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7527 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7528 match(Set dst (MulVL src1 src2)); 7529 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7530 ins_encode %{ 7531 int vector_len = 1; 7532 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7538 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7539 match(Set dst (MulVL src (LoadVector mem))); 7540 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7541 ins_encode %{ 7542 int vector_len = 1; 7543 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7549 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7550 match(Set dst (MulVL src1 src2)); 7551 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7552 ins_encode %{ 7553 int vector_len = 2; 7554 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7560 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7561 match(Set dst (MulVL src (LoadVector mem))); 7562 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7563 ins_encode %{ 7564 int vector_len = 2; 7565 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7571 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7572 match(Set dst (MulVI src1 src2)); 7573 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7574 ins_encode %{ 7575 int vector_len = 1; 7576 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7582 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7583 match(Set dst (MulVI src (LoadVector mem))); 7584 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7585 ins_encode %{ 7586 int vector_len = 1; 7587 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7588 %} 7589 ins_pipe( pipe_slow ); 7590 %} 7591 7592 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7593 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7594 match(Set dst (MulVI src1 src2)); 7595 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7596 ins_encode %{ 7597 int vector_len = 2; 7598 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7604 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7605 match(Set dst (MulVI src (LoadVector mem))); 7606 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7607 ins_encode %{ 7608 int vector_len = 2; 7609 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 // Floats vector mul 7615 instruct vmul2F(vecD dst, vecD src) %{ 7616 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7617 match(Set dst (MulVF dst src)); 7618 format %{ "mulps $dst,$src\t! mul packed2F" %} 7619 ins_encode %{ 7620 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7626 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7627 match(Set dst (MulVF src1 src2)); 7628 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7629 ins_encode %{ 7630 int vector_len = 0; 7631 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7632 %} 7633 ins_pipe( pipe_slow ); 7634 %} 7635 7636 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7637 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7638 match(Set dst (MulVF src (LoadVector mem))); 7639 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7640 ins_encode %{ 7641 int vector_len = 0; 7642 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7643 %} 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 instruct vmul4F(vecX dst, vecX src) %{ 7648 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7649 match(Set dst (MulVF dst src)); 7650 format %{ "mulps $dst,$src\t! mul packed4F" %} 7651 ins_encode %{ 7652 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7653 %} 7654 ins_pipe( pipe_slow ); 7655 %} 7656 7657 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7658 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7659 match(Set dst (MulVF src1 src2)); 7660 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7661 ins_encode %{ 7662 int vector_len = 0; 7663 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7664 %} 7665 ins_pipe( pipe_slow ); 7666 %} 7667 7668 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7669 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7670 match(Set dst (MulVF src (LoadVector mem))); 7671 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7672 ins_encode %{ 7673 int vector_len = 0; 7674 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7675 %} 7676 ins_pipe( pipe_slow ); 7677 %} 7678 7679 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7680 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7681 match(Set dst (MulVF src1 src2)); 7682 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7683 ins_encode %{ 7684 int vector_len = 1; 7685 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7686 %} 7687 ins_pipe( pipe_slow ); 7688 %} 7689 7690 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7691 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7692 match(Set dst (MulVF src (LoadVector mem))); 7693 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7694 ins_encode %{ 7695 int vector_len = 1; 7696 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7702 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7703 match(Set dst (MulVF src1 src2)); 7704 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7705 ins_encode %{ 7706 int vector_len = 2; 7707 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7713 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7714 match(Set dst (MulVF src (LoadVector mem))); 7715 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7716 ins_encode %{ 7717 int vector_len = 2; 7718 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 // Doubles vector mul 7724 instruct vmul2D(vecX dst, vecX src) %{ 7725 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7726 match(Set dst (MulVD dst src)); 7727 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7728 ins_encode %{ 7729 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7735 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7736 match(Set dst (MulVD src1 src2)); 7737 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7738 ins_encode %{ 7739 int vector_len = 0; 7740 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7746 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7747 match(Set dst (MulVD src (LoadVector mem))); 7748 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7749 ins_encode %{ 7750 int vector_len = 0; 7751 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7757 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7758 match(Set dst (MulVD src1 src2)); 7759 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7760 ins_encode %{ 7761 int vector_len = 1; 7762 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7763 %} 7764 ins_pipe( pipe_slow ); 7765 %} 7766 7767 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7768 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7769 match(Set dst (MulVD src (LoadVector mem))); 7770 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7771 ins_encode %{ 7772 int vector_len = 1; 7773 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7779 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7780 match(Set dst (MulVD src1 src2)); 7781 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7782 ins_encode %{ 7783 int vector_len = 2; 7784 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7790 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7791 match(Set dst (MulVD src (LoadVector mem))); 7792 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7793 ins_encode %{ 7794 int vector_len = 2; 7795 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7796 %} 7797 ins_pipe( pipe_slow ); 7798 %} 7799 7800 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7801 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7802 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7803 effect(TEMP dst, USE src1, USE src2); 7804 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7805 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7806 %} 7807 ins_encode %{ 7808 int vector_len = 1; 7809 int cond = (Assembler::Condition)($copnd$$cmpcode); 7810 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7811 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7817 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7818 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7819 effect(TEMP dst, USE src1, USE src2); 7820 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7821 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7822 %} 7823 ins_encode %{ 7824 int vector_len = 1; 7825 int cond = (Assembler::Condition)($copnd$$cmpcode); 7826 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7827 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7828 %} 7829 ins_pipe( pipe_slow ); 7830 %} 7831 7832 // --------------------------------- DIV -------------------------------------- 7833 7834 // Floats vector div 7835 instruct vdiv2F(vecD dst, vecD src) %{ 7836 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7837 match(Set dst (DivVF dst src)); 7838 format %{ "divps $dst,$src\t! div packed2F" %} 7839 ins_encode %{ 7840 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7841 %} 7842 ins_pipe( pipe_slow ); 7843 %} 7844 7845 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7846 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7847 match(Set dst (DivVF src1 src2)); 7848 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7849 ins_encode %{ 7850 int vector_len = 0; 7851 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7852 %} 7853 ins_pipe( pipe_slow ); 7854 %} 7855 7856 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7857 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7858 match(Set dst (DivVF src (LoadVector mem))); 7859 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7860 ins_encode %{ 7861 int vector_len = 0; 7862 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7863 %} 7864 ins_pipe( pipe_slow ); 7865 %} 7866 7867 instruct vdiv4F(vecX dst, vecX src) %{ 7868 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7869 match(Set dst (DivVF dst src)); 7870 format %{ "divps $dst,$src\t! div packed4F" %} 7871 ins_encode %{ 7872 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7878 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7879 match(Set dst (DivVF src1 src2)); 7880 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7881 ins_encode %{ 7882 int vector_len = 0; 7883 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7884 %} 7885 ins_pipe( pipe_slow ); 7886 %} 7887 7888 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7889 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7890 match(Set dst (DivVF src (LoadVector mem))); 7891 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7892 ins_encode %{ 7893 int vector_len = 0; 7894 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7895 %} 7896 ins_pipe( pipe_slow ); 7897 %} 7898 7899 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7900 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7901 match(Set dst (DivVF src1 src2)); 7902 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7903 ins_encode %{ 7904 int vector_len = 1; 7905 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7906 %} 7907 ins_pipe( pipe_slow ); 7908 %} 7909 7910 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7911 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7912 match(Set dst (DivVF src (LoadVector mem))); 7913 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7914 ins_encode %{ 7915 int vector_len = 1; 7916 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7917 %} 7918 ins_pipe( pipe_slow ); 7919 %} 7920 7921 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7922 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7923 match(Set dst (DivVF src1 src2)); 7924 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7925 ins_encode %{ 7926 int vector_len = 2; 7927 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7928 %} 7929 ins_pipe( pipe_slow ); 7930 %} 7931 7932 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7933 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7934 match(Set dst (DivVF src (LoadVector mem))); 7935 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7936 ins_encode %{ 7937 int vector_len = 2; 7938 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7939 %} 7940 ins_pipe( pipe_slow ); 7941 %} 7942 7943 // Doubles vector div 7944 instruct vdiv2D(vecX dst, vecX src) %{ 7945 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7946 match(Set dst (DivVD dst src)); 7947 format %{ "divpd $dst,$src\t! div packed2D" %} 7948 ins_encode %{ 7949 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7950 %} 7951 ins_pipe( pipe_slow ); 7952 %} 7953 7954 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7955 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7956 match(Set dst (DivVD src1 src2)); 7957 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7958 ins_encode %{ 7959 int vector_len = 0; 7960 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7961 %} 7962 ins_pipe( pipe_slow ); 7963 %} 7964 7965 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7966 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7967 match(Set dst (DivVD src (LoadVector mem))); 7968 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7969 ins_encode %{ 7970 int vector_len = 0; 7971 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7972 %} 7973 ins_pipe( pipe_slow ); 7974 %} 7975 7976 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7977 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7978 match(Set dst (DivVD src1 src2)); 7979 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7980 ins_encode %{ 7981 int vector_len = 1; 7982 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7983 %} 7984 ins_pipe( pipe_slow ); 7985 %} 7986 7987 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7988 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7989 match(Set dst (DivVD src (LoadVector mem))); 7990 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7991 ins_encode %{ 7992 int vector_len = 1; 7993 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7994 %} 7995 ins_pipe( pipe_slow ); 7996 %} 7997 7998 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7999 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8000 match(Set dst (DivVD src1 src2)); 8001 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8002 ins_encode %{ 8003 int vector_len = 2; 8004 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8005 %} 8006 ins_pipe( pipe_slow ); 8007 %} 8008 8009 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8010 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8011 match(Set dst (DivVD src (LoadVector mem))); 8012 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8013 ins_encode %{ 8014 int vector_len = 2; 8015 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8016 %} 8017 ins_pipe( pipe_slow ); 8018 %} 8019 8020 // ------------------------------ Shift --------------------------------------- 8021 8022 // Left and right shift count vectors are the same on x86 8023 // (only lowest bits of xmm reg are used for count). 8024 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8025 match(Set dst (LShiftCntV cnt)); 8026 match(Set dst (RShiftCntV cnt)); 8027 format %{ "movd $dst,$cnt\t! load shift count" %} 8028 ins_encode %{ 8029 __ movdl($dst$$XMMRegister, $cnt$$Register); 8030 %} 8031 ins_pipe( pipe_slow ); 8032 %} 8033 8034 // --------------------------------- Sqrt -------------------------------------- 8035 8036 // Floating point vector sqrt 8037 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8038 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8039 match(Set dst (SqrtVD src)); 8040 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8041 ins_encode %{ 8042 int vector_len = 0; 8043 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8044 %} 8045 ins_pipe( pipe_slow ); 8046 %} 8047 8048 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8049 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8050 match(Set dst (SqrtVD (LoadVector mem))); 8051 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8052 ins_encode %{ 8053 int vector_len = 0; 8054 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8055 %} 8056 ins_pipe( pipe_slow ); 8057 %} 8058 8059 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8060 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8061 match(Set dst (SqrtVD src)); 8062 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8063 ins_encode %{ 8064 int vector_len = 1; 8065 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8066 %} 8067 ins_pipe( pipe_slow ); 8068 %} 8069 8070 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8071 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8072 match(Set dst (SqrtVD (LoadVector mem))); 8073 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8074 ins_encode %{ 8075 int vector_len = 1; 8076 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8082 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8083 match(Set dst (SqrtVD src)); 8084 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8085 ins_encode %{ 8086 int vector_len = 2; 8087 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 8092 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8093 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8094 match(Set dst (SqrtVD (LoadVector mem))); 8095 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8096 ins_encode %{ 8097 int vector_len = 2; 8098 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8099 %} 8100 ins_pipe( pipe_slow ); 8101 %} 8102 8103 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8104 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8105 match(Set dst (SqrtVF src)); 8106 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8107 ins_encode %{ 8108 int vector_len = 0; 8109 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8110 %} 8111 ins_pipe( pipe_slow ); 8112 %} 8113 8114 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8116 match(Set dst (SqrtVF (LoadVector mem))); 8117 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8118 ins_encode %{ 8119 int vector_len = 0; 8120 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8126 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8127 match(Set dst (SqrtVF src)); 8128 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8129 ins_encode %{ 8130 int vector_len = 0; 8131 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8132 %} 8133 ins_pipe( pipe_slow ); 8134 %} 8135 8136 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8138 match(Set dst (SqrtVF (LoadVector mem))); 8139 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8140 ins_encode %{ 8141 int vector_len = 0; 8142 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8148 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8149 match(Set dst (SqrtVF src)); 8150 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8151 ins_encode %{ 8152 int vector_len = 1; 8153 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8154 %} 8155 ins_pipe( pipe_slow ); 8156 %} 8157 8158 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8159 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8160 match(Set dst (SqrtVF (LoadVector mem))); 8161 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8162 ins_encode %{ 8163 int vector_len = 1; 8164 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8165 %} 8166 ins_pipe( pipe_slow ); 8167 %} 8168 8169 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8170 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8171 match(Set dst (SqrtVF src)); 8172 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8173 ins_encode %{ 8174 int vector_len = 2; 8175 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8176 %} 8177 ins_pipe( pipe_slow ); 8178 %} 8179 8180 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8181 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8182 match(Set dst (SqrtVF (LoadVector mem))); 8183 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8184 ins_encode %{ 8185 int vector_len = 2; 8186 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8187 %} 8188 ins_pipe( pipe_slow ); 8189 %} 8190 8191 // ------------------------------ LeftShift ----------------------------------- 8192 8193 // Shorts/Chars vector left shift 8194 instruct vsll2S(vecS dst, vecS shift) %{ 8195 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8196 match(Set dst (LShiftVS dst shift)); 8197 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8198 ins_encode %{ 8199 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8200 %} 8201 ins_pipe( pipe_slow ); 8202 %} 8203 8204 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8205 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8206 match(Set dst (LShiftVS dst shift)); 8207 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8208 ins_encode %{ 8209 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8215 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8216 match(Set dst (LShiftVS src shift)); 8217 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8218 ins_encode %{ 8219 int vector_len = 0; 8220 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8221 %} 8222 ins_pipe( pipe_slow ); 8223 %} 8224 8225 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8226 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8227 match(Set dst (LShiftVS src shift)); 8228 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8229 ins_encode %{ 8230 int vector_len = 0; 8231 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8232 %} 8233 ins_pipe( pipe_slow ); 8234 %} 8235 8236 instruct vsll4S(vecD dst, vecS shift) %{ 8237 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8238 match(Set dst (LShiftVS dst shift)); 8239 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8240 ins_encode %{ 8241 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8242 %} 8243 ins_pipe( pipe_slow ); 8244 %} 8245 8246 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8247 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8248 match(Set dst (LShiftVS dst shift)); 8249 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8250 ins_encode %{ 8251 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8252 %} 8253 ins_pipe( pipe_slow ); 8254 %} 8255 8256 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8257 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8258 match(Set dst (LShiftVS src shift)); 8259 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8260 ins_encode %{ 8261 int vector_len = 0; 8262 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8263 %} 8264 ins_pipe( pipe_slow ); 8265 %} 8266 8267 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8268 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8269 match(Set dst (LShiftVS src shift)); 8270 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8271 ins_encode %{ 8272 int vector_len = 0; 8273 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8274 %} 8275 ins_pipe( pipe_slow ); 8276 %} 8277 8278 instruct vsll8S(vecX dst, vecS shift) %{ 8279 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8280 match(Set dst (LShiftVS dst shift)); 8281 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8282 ins_encode %{ 8283 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8284 %} 8285 ins_pipe( pipe_slow ); 8286 %} 8287 8288 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8289 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8290 match(Set dst (LShiftVS dst shift)); 8291 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8292 ins_encode %{ 8293 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8299 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8300 match(Set dst (LShiftVS src shift)); 8301 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8302 ins_encode %{ 8303 int vector_len = 0; 8304 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8310 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8311 match(Set dst (LShiftVS src shift)); 8312 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8313 ins_encode %{ 8314 int vector_len = 0; 8315 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8321 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8322 match(Set dst (LShiftVS src shift)); 8323 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8324 ins_encode %{ 8325 int vector_len = 1; 8326 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8332 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8333 match(Set dst (LShiftVS src shift)); 8334 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8335 ins_encode %{ 8336 int vector_len = 1; 8337 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8343 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8344 match(Set dst (LShiftVS src shift)); 8345 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8346 ins_encode %{ 8347 int vector_len = 2; 8348 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8354 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8355 match(Set dst (LShiftVS src shift)); 8356 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8357 ins_encode %{ 8358 int vector_len = 2; 8359 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 // Integers vector left shift 8365 instruct vsll2I(vecD dst, vecS shift) %{ 8366 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8367 match(Set dst (LShiftVI dst shift)); 8368 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8369 ins_encode %{ 8370 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8376 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8377 match(Set dst (LShiftVI dst shift)); 8378 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8379 ins_encode %{ 8380 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8381 %} 8382 ins_pipe( pipe_slow ); 8383 %} 8384 8385 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8386 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8387 match(Set dst (LShiftVI src shift)); 8388 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8389 ins_encode %{ 8390 int vector_len = 0; 8391 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8392 %} 8393 ins_pipe( pipe_slow ); 8394 %} 8395 8396 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8397 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8398 match(Set dst (LShiftVI src shift)); 8399 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8400 ins_encode %{ 8401 int vector_len = 0; 8402 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8403 %} 8404 ins_pipe( pipe_slow ); 8405 %} 8406 8407 instruct vsll4I(vecX dst, vecS shift) %{ 8408 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8409 match(Set dst (LShiftVI dst shift)); 8410 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8411 ins_encode %{ 8412 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8413 %} 8414 ins_pipe( pipe_slow ); 8415 %} 8416 8417 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8418 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8419 match(Set dst (LShiftVI dst shift)); 8420 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8421 ins_encode %{ 8422 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8423 %} 8424 ins_pipe( pipe_slow ); 8425 %} 8426 8427 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8428 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8429 match(Set dst (LShiftVI src shift)); 8430 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8431 ins_encode %{ 8432 int vector_len = 0; 8433 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8439 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8440 match(Set dst (LShiftVI src shift)); 8441 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8442 ins_encode %{ 8443 int vector_len = 0; 8444 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8445 %} 8446 ins_pipe( pipe_slow ); 8447 %} 8448 8449 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8450 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8451 match(Set dst (LShiftVI src shift)); 8452 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8453 ins_encode %{ 8454 int vector_len = 1; 8455 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8456 %} 8457 ins_pipe( pipe_slow ); 8458 %} 8459 8460 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8461 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8462 match(Set dst (LShiftVI src shift)); 8463 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8464 ins_encode %{ 8465 int vector_len = 1; 8466 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8467 %} 8468 ins_pipe( pipe_slow ); 8469 %} 8470 8471 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8472 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8473 match(Set dst (LShiftVI src shift)); 8474 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8475 ins_encode %{ 8476 int vector_len = 2; 8477 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8478 %} 8479 ins_pipe( pipe_slow ); 8480 %} 8481 8482 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8483 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8484 match(Set dst (LShiftVI src shift)); 8485 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8486 ins_encode %{ 8487 int vector_len = 2; 8488 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8489 %} 8490 ins_pipe( pipe_slow ); 8491 %} 8492 8493 // Longs vector left shift 8494 instruct vsll2L(vecX dst, vecS shift) %{ 8495 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8496 match(Set dst (LShiftVL dst shift)); 8497 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8498 ins_encode %{ 8499 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8500 %} 8501 ins_pipe( pipe_slow ); 8502 %} 8503 8504 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8505 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8506 match(Set dst (LShiftVL dst shift)); 8507 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8508 ins_encode %{ 8509 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8510 %} 8511 ins_pipe( pipe_slow ); 8512 %} 8513 8514 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8515 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8516 match(Set dst (LShiftVL src shift)); 8517 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8518 ins_encode %{ 8519 int vector_len = 0; 8520 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8521 %} 8522 ins_pipe( pipe_slow ); 8523 %} 8524 8525 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8526 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8527 match(Set dst (LShiftVL src shift)); 8528 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8529 ins_encode %{ 8530 int vector_len = 0; 8531 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8532 %} 8533 ins_pipe( pipe_slow ); 8534 %} 8535 8536 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8537 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8538 match(Set dst (LShiftVL src shift)); 8539 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8540 ins_encode %{ 8541 int vector_len = 1; 8542 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8543 %} 8544 ins_pipe( pipe_slow ); 8545 %} 8546 8547 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8548 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8549 match(Set dst (LShiftVL src shift)); 8550 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8551 ins_encode %{ 8552 int vector_len = 1; 8553 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8554 %} 8555 ins_pipe( pipe_slow ); 8556 %} 8557 8558 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8559 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8560 match(Set dst (LShiftVL src shift)); 8561 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8562 ins_encode %{ 8563 int vector_len = 2; 8564 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8565 %} 8566 ins_pipe( pipe_slow ); 8567 %} 8568 8569 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8570 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8571 match(Set dst (LShiftVL src shift)); 8572 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8573 ins_encode %{ 8574 int vector_len = 2; 8575 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8576 %} 8577 ins_pipe( pipe_slow ); 8578 %} 8579 8580 // ----------------------- LogicalRightShift ----------------------------------- 8581 8582 // Shorts vector logical right shift produces incorrect Java result 8583 // for negative data because java code convert short value into int with 8584 // sign extension before a shift. But char vectors are fine since chars are 8585 // unsigned values. 8586 8587 instruct vsrl2S(vecS dst, vecS shift) %{ 8588 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8589 match(Set dst (URShiftVS dst shift)); 8590 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8591 ins_encode %{ 8592 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8593 %} 8594 ins_pipe( pipe_slow ); 8595 %} 8596 8597 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8598 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8599 match(Set dst (URShiftVS dst shift)); 8600 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8601 ins_encode %{ 8602 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8603 %} 8604 ins_pipe( pipe_slow ); 8605 %} 8606 8607 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8608 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8609 match(Set dst (URShiftVS src shift)); 8610 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8611 ins_encode %{ 8612 int vector_len = 0; 8613 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8614 %} 8615 ins_pipe( pipe_slow ); 8616 %} 8617 8618 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8619 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8620 match(Set dst (URShiftVS src shift)); 8621 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8622 ins_encode %{ 8623 int vector_len = 0; 8624 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8625 %} 8626 ins_pipe( pipe_slow ); 8627 %} 8628 8629 instruct vsrl4S(vecD dst, vecS shift) %{ 8630 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8631 match(Set dst (URShiftVS dst shift)); 8632 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8633 ins_encode %{ 8634 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8635 %} 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8640 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8641 match(Set dst (URShiftVS dst shift)); 8642 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8643 ins_encode %{ 8644 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8645 %} 8646 ins_pipe( pipe_slow ); 8647 %} 8648 8649 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8651 match(Set dst (URShiftVS src shift)); 8652 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8653 ins_encode %{ 8654 int vector_len = 0; 8655 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8656 %} 8657 ins_pipe( pipe_slow ); 8658 %} 8659 8660 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8662 match(Set dst (URShiftVS src shift)); 8663 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8664 ins_encode %{ 8665 int vector_len = 0; 8666 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8667 %} 8668 ins_pipe( pipe_slow ); 8669 %} 8670 8671 instruct vsrl8S(vecX dst, vecS shift) %{ 8672 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8673 match(Set dst (URShiftVS dst shift)); 8674 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8675 ins_encode %{ 8676 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8677 %} 8678 ins_pipe( pipe_slow ); 8679 %} 8680 8681 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8682 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8683 match(Set dst (URShiftVS dst shift)); 8684 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8685 ins_encode %{ 8686 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8687 %} 8688 ins_pipe( pipe_slow ); 8689 %} 8690 8691 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8692 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8693 match(Set dst (URShiftVS src shift)); 8694 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8695 ins_encode %{ 8696 int vector_len = 0; 8697 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8698 %} 8699 ins_pipe( pipe_slow ); 8700 %} 8701 8702 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8703 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8704 match(Set dst (URShiftVS src shift)); 8705 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8706 ins_encode %{ 8707 int vector_len = 0; 8708 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8714 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8715 match(Set dst (URShiftVS src shift)); 8716 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8717 ins_encode %{ 8718 int vector_len = 1; 8719 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8725 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8726 match(Set dst (URShiftVS src shift)); 8727 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8728 ins_encode %{ 8729 int vector_len = 1; 8730 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8736 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8737 match(Set dst (URShiftVS src shift)); 8738 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8739 ins_encode %{ 8740 int vector_len = 2; 8741 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8747 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8748 match(Set dst (URShiftVS src shift)); 8749 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8750 ins_encode %{ 8751 int vector_len = 2; 8752 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 // Integers vector logical right shift 8758 instruct vsrl2I(vecD dst, vecS shift) %{ 8759 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8760 match(Set dst (URShiftVI dst shift)); 8761 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8762 ins_encode %{ 8763 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8764 %} 8765 ins_pipe( pipe_slow ); 8766 %} 8767 8768 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8769 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8770 match(Set dst (URShiftVI dst shift)); 8771 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8772 ins_encode %{ 8773 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8774 %} 8775 ins_pipe( pipe_slow ); 8776 %} 8777 8778 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8779 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8780 match(Set dst (URShiftVI src shift)); 8781 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8782 ins_encode %{ 8783 int vector_len = 0; 8784 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8785 %} 8786 ins_pipe( pipe_slow ); 8787 %} 8788 8789 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8790 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8791 match(Set dst (URShiftVI src shift)); 8792 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8793 ins_encode %{ 8794 int vector_len = 0; 8795 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8796 %} 8797 ins_pipe( pipe_slow ); 8798 %} 8799 8800 instruct vsrl4I(vecX dst, vecS shift) %{ 8801 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8802 match(Set dst (URShiftVI dst shift)); 8803 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8804 ins_encode %{ 8805 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8811 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8812 match(Set dst (URShiftVI dst shift)); 8813 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8814 ins_encode %{ 8815 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8816 %} 8817 ins_pipe( pipe_slow ); 8818 %} 8819 8820 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8821 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8822 match(Set dst (URShiftVI src shift)); 8823 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8824 ins_encode %{ 8825 int vector_len = 0; 8826 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8827 %} 8828 ins_pipe( pipe_slow ); 8829 %} 8830 8831 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8832 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8833 match(Set dst (URShiftVI src shift)); 8834 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8835 ins_encode %{ 8836 int vector_len = 0; 8837 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8838 %} 8839 ins_pipe( pipe_slow ); 8840 %} 8841 8842 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8843 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8844 match(Set dst (URShiftVI src shift)); 8845 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8846 ins_encode %{ 8847 int vector_len = 1; 8848 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8849 %} 8850 ins_pipe( pipe_slow ); 8851 %} 8852 8853 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8854 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8855 match(Set dst (URShiftVI src shift)); 8856 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8857 ins_encode %{ 8858 int vector_len = 1; 8859 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8860 %} 8861 ins_pipe( pipe_slow ); 8862 %} 8863 8864 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8865 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8866 match(Set dst (URShiftVI src shift)); 8867 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8868 ins_encode %{ 8869 int vector_len = 2; 8870 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8871 %} 8872 ins_pipe( pipe_slow ); 8873 %} 8874 8875 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8876 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8877 match(Set dst (URShiftVI src shift)); 8878 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8879 ins_encode %{ 8880 int vector_len = 2; 8881 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8882 %} 8883 ins_pipe( pipe_slow ); 8884 %} 8885 8886 // Longs vector logical right shift 8887 instruct vsrl2L(vecX dst, vecS shift) %{ 8888 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8889 match(Set dst (URShiftVL dst shift)); 8890 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8891 ins_encode %{ 8892 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8893 %} 8894 ins_pipe( pipe_slow ); 8895 %} 8896 8897 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8898 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8899 match(Set dst (URShiftVL dst shift)); 8900 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8901 ins_encode %{ 8902 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8903 %} 8904 ins_pipe( pipe_slow ); 8905 %} 8906 8907 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8908 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8909 match(Set dst (URShiftVL src shift)); 8910 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8911 ins_encode %{ 8912 int vector_len = 0; 8913 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8914 %} 8915 ins_pipe( pipe_slow ); 8916 %} 8917 8918 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8919 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8920 match(Set dst (URShiftVL src shift)); 8921 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8922 ins_encode %{ 8923 int vector_len = 0; 8924 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8925 %} 8926 ins_pipe( pipe_slow ); 8927 %} 8928 8929 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8930 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8931 match(Set dst (URShiftVL src shift)); 8932 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8933 ins_encode %{ 8934 int vector_len = 1; 8935 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8936 %} 8937 ins_pipe( pipe_slow ); 8938 %} 8939 8940 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8941 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8942 match(Set dst (URShiftVL src shift)); 8943 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8944 ins_encode %{ 8945 int vector_len = 1; 8946 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8947 %} 8948 ins_pipe( pipe_slow ); 8949 %} 8950 8951 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8952 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8953 match(Set dst (URShiftVL src shift)); 8954 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8955 ins_encode %{ 8956 int vector_len = 2; 8957 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8958 %} 8959 ins_pipe( pipe_slow ); 8960 %} 8961 8962 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8963 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8964 match(Set dst (URShiftVL src shift)); 8965 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8966 ins_encode %{ 8967 int vector_len = 2; 8968 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8969 %} 8970 ins_pipe( pipe_slow ); 8971 %} 8972 8973 // ------------------- ArithmeticRightShift ----------------------------------- 8974 8975 // Shorts/Chars vector arithmetic right shift 8976 instruct vsra2S(vecS dst, vecS shift) %{ 8977 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8978 match(Set dst (RShiftVS dst shift)); 8979 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8980 ins_encode %{ 8981 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8982 %} 8983 ins_pipe( pipe_slow ); 8984 %} 8985 8986 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8987 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8988 match(Set dst (RShiftVS dst shift)); 8989 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8990 ins_encode %{ 8991 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8992 %} 8993 ins_pipe( pipe_slow ); 8994 %} 8995 8996 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8997 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8998 match(Set dst (RShiftVS src shift)); 8999 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9000 ins_encode %{ 9001 int vector_len = 0; 9002 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9003 %} 9004 ins_pipe( pipe_slow ); 9005 %} 9006 9007 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9008 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9009 match(Set dst (RShiftVS src shift)); 9010 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9011 ins_encode %{ 9012 int vector_len = 0; 9013 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9014 %} 9015 ins_pipe( pipe_slow ); 9016 %} 9017 9018 instruct vsra4S(vecD dst, vecS shift) %{ 9019 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9020 match(Set dst (RShiftVS dst shift)); 9021 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9022 ins_encode %{ 9023 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9024 %} 9025 ins_pipe( pipe_slow ); 9026 %} 9027 9028 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9029 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9030 match(Set dst (RShiftVS dst shift)); 9031 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9032 ins_encode %{ 9033 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9034 %} 9035 ins_pipe( pipe_slow ); 9036 %} 9037 9038 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9039 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9040 match(Set dst (RShiftVS src shift)); 9041 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9042 ins_encode %{ 9043 int vector_len = 0; 9044 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9045 %} 9046 ins_pipe( pipe_slow ); 9047 %} 9048 9049 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9050 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9051 match(Set dst (RShiftVS src shift)); 9052 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9053 ins_encode %{ 9054 int vector_len = 0; 9055 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9056 %} 9057 ins_pipe( pipe_slow ); 9058 %} 9059 9060 instruct vsra8S(vecX dst, vecS shift) %{ 9061 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9062 match(Set dst (RShiftVS dst shift)); 9063 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9064 ins_encode %{ 9065 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9066 %} 9067 ins_pipe( pipe_slow ); 9068 %} 9069 9070 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9071 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9072 match(Set dst (RShiftVS dst shift)); 9073 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9074 ins_encode %{ 9075 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9076 %} 9077 ins_pipe( pipe_slow ); 9078 %} 9079 9080 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9081 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9082 match(Set dst (RShiftVS src shift)); 9083 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9084 ins_encode %{ 9085 int vector_len = 0; 9086 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9087 %} 9088 ins_pipe( pipe_slow ); 9089 %} 9090 9091 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9092 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9093 match(Set dst (RShiftVS src shift)); 9094 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9095 ins_encode %{ 9096 int vector_len = 0; 9097 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9098 %} 9099 ins_pipe( pipe_slow ); 9100 %} 9101 9102 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9103 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9104 match(Set dst (RShiftVS src shift)); 9105 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9106 ins_encode %{ 9107 int vector_len = 1; 9108 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9109 %} 9110 ins_pipe( pipe_slow ); 9111 %} 9112 9113 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9114 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9115 match(Set dst (RShiftVS src shift)); 9116 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9117 ins_encode %{ 9118 int vector_len = 1; 9119 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9120 %} 9121 ins_pipe( pipe_slow ); 9122 %} 9123 9124 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9125 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9126 match(Set dst (RShiftVS src shift)); 9127 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9128 ins_encode %{ 9129 int vector_len = 2; 9130 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9131 %} 9132 ins_pipe( pipe_slow ); 9133 %} 9134 9135 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9136 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9137 match(Set dst (RShiftVS src shift)); 9138 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9139 ins_encode %{ 9140 int vector_len = 2; 9141 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 // Integers vector arithmetic right shift 9147 instruct vsra2I(vecD dst, vecS shift) %{ 9148 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9149 match(Set dst (RShiftVI dst shift)); 9150 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9151 ins_encode %{ 9152 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9153 %} 9154 ins_pipe( pipe_slow ); 9155 %} 9156 9157 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9158 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9159 match(Set dst (RShiftVI dst shift)); 9160 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9161 ins_encode %{ 9162 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9163 %} 9164 ins_pipe( pipe_slow ); 9165 %} 9166 9167 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9168 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9169 match(Set dst (RShiftVI src shift)); 9170 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9171 ins_encode %{ 9172 int vector_len = 0; 9173 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9174 %} 9175 ins_pipe( pipe_slow ); 9176 %} 9177 9178 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9179 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9180 match(Set dst (RShiftVI src shift)); 9181 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9182 ins_encode %{ 9183 int vector_len = 0; 9184 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9185 %} 9186 ins_pipe( pipe_slow ); 9187 %} 9188 9189 instruct vsra4I(vecX dst, vecS shift) %{ 9190 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9191 match(Set dst (RShiftVI dst shift)); 9192 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9193 ins_encode %{ 9194 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9195 %} 9196 ins_pipe( pipe_slow ); 9197 %} 9198 9199 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9200 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9201 match(Set dst (RShiftVI dst shift)); 9202 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9203 ins_encode %{ 9204 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9205 %} 9206 ins_pipe( pipe_slow ); 9207 %} 9208 9209 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9210 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9211 match(Set dst (RShiftVI src shift)); 9212 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9213 ins_encode %{ 9214 int vector_len = 0; 9215 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9216 %} 9217 ins_pipe( pipe_slow ); 9218 %} 9219 9220 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9221 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9222 match(Set dst (RShiftVI src shift)); 9223 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9224 ins_encode %{ 9225 int vector_len = 0; 9226 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9227 %} 9228 ins_pipe( pipe_slow ); 9229 %} 9230 9231 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9232 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9233 match(Set dst (RShiftVI src shift)); 9234 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9235 ins_encode %{ 9236 int vector_len = 1; 9237 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9238 %} 9239 ins_pipe( pipe_slow ); 9240 %} 9241 9242 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9243 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9244 match(Set dst (RShiftVI src shift)); 9245 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9246 ins_encode %{ 9247 int vector_len = 1; 9248 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9249 %} 9250 ins_pipe( pipe_slow ); 9251 %} 9252 9253 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9254 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9255 match(Set dst (RShiftVI src shift)); 9256 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9257 ins_encode %{ 9258 int vector_len = 2; 9259 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9260 %} 9261 ins_pipe( pipe_slow ); 9262 %} 9263 9264 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9265 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9266 match(Set dst (RShiftVI src shift)); 9267 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9268 ins_encode %{ 9269 int vector_len = 2; 9270 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9271 %} 9272 ins_pipe( pipe_slow ); 9273 %} 9274 9275 // There are no longs vector arithmetic right shift instructions. 9276 9277 9278 // --------------------------------- AND -------------------------------------- 9279 9280 instruct vand4B(vecS dst, vecS src) %{ 9281 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9282 match(Set dst (AndV dst src)); 9283 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9284 ins_encode %{ 9285 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9286 %} 9287 ins_pipe( pipe_slow ); 9288 %} 9289 9290 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9291 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9292 match(Set dst (AndV src1 src2)); 9293 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9294 ins_encode %{ 9295 int vector_len = 0; 9296 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9302 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9303 match(Set dst (AndV src (LoadVector mem))); 9304 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9305 ins_encode %{ 9306 int vector_len = 0; 9307 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vand8B(vecD dst, vecD src) %{ 9313 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9314 match(Set dst (AndV dst src)); 9315 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9316 ins_encode %{ 9317 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9318 %} 9319 ins_pipe( pipe_slow ); 9320 %} 9321 9322 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 9323 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9324 match(Set dst (AndV src1 src2)); 9325 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 9326 ins_encode %{ 9327 int vector_len = 0; 9328 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9329 %} 9330 ins_pipe( pipe_slow ); 9331 %} 9332 9333 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 9334 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9335 match(Set dst (AndV src (LoadVector mem))); 9336 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 9337 ins_encode %{ 9338 int vector_len = 0; 9339 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct vand16B(vecX dst, vecX src) %{ 9345 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9346 match(Set dst (AndV dst src)); 9347 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 9348 ins_encode %{ 9349 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9350 %} 9351 ins_pipe( pipe_slow ); 9352 %} 9353 9354 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 9355 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9356 match(Set dst (AndV src1 src2)); 9357 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 9358 ins_encode %{ 9359 int vector_len = 0; 9360 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9361 %} 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 9366 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9367 match(Set dst (AndV src (LoadVector mem))); 9368 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 9369 ins_encode %{ 9370 int vector_len = 0; 9371 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 9377 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9378 match(Set dst (AndV src1 src2)); 9379 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 9380 ins_encode %{ 9381 int vector_len = 1; 9382 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9383 %} 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 9388 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9389 match(Set dst (AndV src (LoadVector mem))); 9390 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 9391 ins_encode %{ 9392 int vector_len = 1; 9393 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9394 %} 9395 ins_pipe( pipe_slow ); 9396 %} 9397 9398 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9399 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9400 match(Set dst (AndV src1 src2)); 9401 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 9402 ins_encode %{ 9403 int vector_len = 2; 9404 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9405 %} 9406 ins_pipe( pipe_slow ); 9407 %} 9408 9409 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9410 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9411 match(Set dst (AndV src (LoadVector mem))); 9412 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9413 ins_encode %{ 9414 int vector_len = 2; 9415 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9416 %} 9417 ins_pipe( pipe_slow ); 9418 %} 9419 9420 // --------------------------------- OR --------------------------------------- 9421 9422 instruct vor4B(vecS dst, vecS src) %{ 9423 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9424 match(Set dst (OrV dst src)); 9425 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9426 ins_encode %{ 9427 __ por($dst$$XMMRegister, $src$$XMMRegister); 9428 %} 9429 ins_pipe( pipe_slow ); 9430 %} 9431 9432 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9433 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9434 match(Set dst (OrV src1 src2)); 9435 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9436 ins_encode %{ 9437 int vector_len = 0; 9438 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9439 %} 9440 ins_pipe( pipe_slow ); 9441 %} 9442 9443 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9444 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9445 match(Set dst (OrV src (LoadVector mem))); 9446 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9447 ins_encode %{ 9448 int vector_len = 0; 9449 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 instruct vor8B(vecD dst, vecD src) %{ 9455 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9456 match(Set dst (OrV dst src)); 9457 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9458 ins_encode %{ 9459 __ por($dst$$XMMRegister, $src$$XMMRegister); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9465 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9466 match(Set dst (OrV src1 src2)); 9467 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9468 ins_encode %{ 9469 int vector_len = 0; 9470 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9471 %} 9472 ins_pipe( pipe_slow ); 9473 %} 9474 9475 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9476 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9477 match(Set dst (OrV src (LoadVector mem))); 9478 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9479 ins_encode %{ 9480 int vector_len = 0; 9481 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9482 %} 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 instruct vor16B(vecX dst, vecX src) %{ 9487 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9488 match(Set dst (OrV dst src)); 9489 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9490 ins_encode %{ 9491 __ por($dst$$XMMRegister, $src$$XMMRegister); 9492 %} 9493 ins_pipe( pipe_slow ); 9494 %} 9495 9496 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9497 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9498 match(Set dst (OrV src1 src2)); 9499 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9500 ins_encode %{ 9501 int vector_len = 0; 9502 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9503 %} 9504 ins_pipe( pipe_slow ); 9505 %} 9506 9507 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9508 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9509 match(Set dst (OrV src (LoadVector mem))); 9510 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9511 ins_encode %{ 9512 int vector_len = 0; 9513 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9519 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9520 match(Set dst (OrV src1 src2)); 9521 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9522 ins_encode %{ 9523 int vector_len = 1; 9524 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9530 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9531 match(Set dst (OrV src (LoadVector mem))); 9532 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9533 ins_encode %{ 9534 int vector_len = 1; 9535 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9536 %} 9537 ins_pipe( pipe_slow ); 9538 %} 9539 9540 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9541 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9542 match(Set dst (OrV src1 src2)); 9543 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9544 ins_encode %{ 9545 int vector_len = 2; 9546 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9547 %} 9548 ins_pipe( pipe_slow ); 9549 %} 9550 9551 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9552 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9553 match(Set dst (OrV src (LoadVector mem))); 9554 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9555 ins_encode %{ 9556 int vector_len = 2; 9557 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9558 %} 9559 ins_pipe( pipe_slow ); 9560 %} 9561 9562 // --------------------------------- XOR -------------------------------------- 9563 9564 instruct vxor4B(vecS dst, vecS src) %{ 9565 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9566 match(Set dst (XorV dst src)); 9567 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9568 ins_encode %{ 9569 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9570 %} 9571 ins_pipe( pipe_slow ); 9572 %} 9573 9574 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9575 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9576 match(Set dst (XorV src1 src2)); 9577 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9578 ins_encode %{ 9579 int vector_len = 0; 9580 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9586 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9587 match(Set dst (XorV src (LoadVector mem))); 9588 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9589 ins_encode %{ 9590 int vector_len = 0; 9591 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9592 %} 9593 ins_pipe( pipe_slow ); 9594 %} 9595 9596 instruct vxor8B(vecD dst, vecD src) %{ 9597 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9598 match(Set dst (XorV dst src)); 9599 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9600 ins_encode %{ 9601 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9602 %} 9603 ins_pipe( pipe_slow ); 9604 %} 9605 9606 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9607 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9608 match(Set dst (XorV src1 src2)); 9609 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9610 ins_encode %{ 9611 int vector_len = 0; 9612 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9613 %} 9614 ins_pipe( pipe_slow ); 9615 %} 9616 9617 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9618 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9619 match(Set dst (XorV src (LoadVector mem))); 9620 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9621 ins_encode %{ 9622 int vector_len = 0; 9623 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9624 %} 9625 ins_pipe( pipe_slow ); 9626 %} 9627 9628 instruct vxor16B(vecX dst, vecX src) %{ 9629 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9630 match(Set dst (XorV dst src)); 9631 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9632 ins_encode %{ 9633 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9634 %} 9635 ins_pipe( pipe_slow ); 9636 %} 9637 9638 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9639 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9640 match(Set dst (XorV src1 src2)); 9641 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9642 ins_encode %{ 9643 int vector_len = 0; 9644 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9650 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9651 match(Set dst (XorV src (LoadVector mem))); 9652 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9653 ins_encode %{ 9654 int vector_len = 0; 9655 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9656 %} 9657 ins_pipe( pipe_slow ); 9658 %} 9659 9660 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9661 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9662 match(Set dst (XorV src1 src2)); 9663 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9664 ins_encode %{ 9665 int vector_len = 1; 9666 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9667 %} 9668 ins_pipe( pipe_slow ); 9669 %} 9670 9671 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9672 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9673 match(Set dst (XorV src (LoadVector mem))); 9674 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9675 ins_encode %{ 9676 int vector_len = 1; 9677 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9678 %} 9679 ins_pipe( pipe_slow ); 9680 %} 9681 9682 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9683 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9684 match(Set dst (XorV src1 src2)); 9685 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9686 ins_encode %{ 9687 int vector_len = 2; 9688 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9689 %} 9690 ins_pipe( pipe_slow ); 9691 %} 9692 9693 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9694 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9695 match(Set dst (XorV src (LoadVector mem))); 9696 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9697 ins_encode %{ 9698 int vector_len = 2; 9699 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9700 %} 9701 ins_pipe( pipe_slow ); 9702 %} 9703 9704 // --------------------------------- FMA -------------------------------------- 9705 9706 // a * b + c 9707 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9708 predicate(UseFMA && n->as_Vector()->length() == 2); 9709 match(Set c (FmaVD c (Binary a b))); 9710 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9711 ins_cost(150); 9712 ins_encode %{ 9713 int vector_len = 0; 9714 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9715 %} 9716 ins_pipe( pipe_slow ); 9717 %} 9718 9719 // a * b + c 9720 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9721 predicate(UseFMA && n->as_Vector()->length() == 2); 9722 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9723 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9724 ins_cost(150); 9725 ins_encode %{ 9726 int vector_len = 0; 9727 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9728 %} 9729 ins_pipe( pipe_slow ); 9730 %} 9731 9732 9733 // a * b + c 9734 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9735 predicate(UseFMA && n->as_Vector()->length() == 4); 9736 match(Set c (FmaVD c (Binary a b))); 9737 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9738 ins_cost(150); 9739 ins_encode %{ 9740 int vector_len = 1; 9741 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9742 %} 9743 ins_pipe( pipe_slow ); 9744 %} 9745 9746 // a * b + c 9747 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9748 predicate(UseFMA && n->as_Vector()->length() == 4); 9749 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9750 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9751 ins_cost(150); 9752 ins_encode %{ 9753 int vector_len = 1; 9754 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9755 %} 9756 ins_pipe( pipe_slow ); 9757 %} 9758 9759 // a * b + c 9760 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9761 predicate(UseFMA && n->as_Vector()->length() == 8); 9762 match(Set c (FmaVD c (Binary a b))); 9763 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9764 ins_cost(150); 9765 ins_encode %{ 9766 int vector_len = 2; 9767 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9768 %} 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 // a * b + c 9773 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9774 predicate(UseFMA && n->as_Vector()->length() == 8); 9775 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9776 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9777 ins_cost(150); 9778 ins_encode %{ 9779 int vector_len = 2; 9780 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9781 %} 9782 ins_pipe( pipe_slow ); 9783 %} 9784 9785 // a * b + c 9786 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9787 predicate(UseFMA && n->as_Vector()->length() == 4); 9788 match(Set c (FmaVF c (Binary a b))); 9789 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9790 ins_cost(150); 9791 ins_encode %{ 9792 int vector_len = 0; 9793 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9794 %} 9795 ins_pipe( pipe_slow ); 9796 %} 9797 9798 // a * b + c 9799 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9800 predicate(UseFMA && n->as_Vector()->length() == 4); 9801 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9802 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9803 ins_cost(150); 9804 ins_encode %{ 9805 int vector_len = 0; 9806 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9807 %} 9808 ins_pipe( pipe_slow ); 9809 %} 9810 9811 // a * b + c 9812 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9813 predicate(UseFMA && n->as_Vector()->length() == 8); 9814 match(Set c (FmaVF c (Binary a b))); 9815 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9816 ins_cost(150); 9817 ins_encode %{ 9818 int vector_len = 1; 9819 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9820 %} 9821 ins_pipe( pipe_slow ); 9822 %} 9823 9824 // a * b + c 9825 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9826 predicate(UseFMA && n->as_Vector()->length() == 8); 9827 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9828 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9829 ins_cost(150); 9830 ins_encode %{ 9831 int vector_len = 1; 9832 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9833 %} 9834 ins_pipe( pipe_slow ); 9835 %} 9836 9837 // a * b + c 9838 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9839 predicate(UseFMA && n->as_Vector()->length() == 16); 9840 match(Set c (FmaVF c (Binary a b))); 9841 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9842 ins_cost(150); 9843 ins_encode %{ 9844 int vector_len = 2; 9845 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9846 %} 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 // a * b + c 9851 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9852 predicate(UseFMA && n->as_Vector()->length() == 16); 9853 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9854 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9855 ins_cost(150); 9856 ins_encode %{ 9857 int vector_len = 2; 9858 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 // --------------------------------- Vector Multiply Add -------------------------------------- 9864 9865 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9866 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9867 match(Set dst (MulAddVS2VI dst src1)); 9868 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9869 ins_encode %{ 9870 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9871 %} 9872 ins_pipe( pipe_slow ); 9873 %} 9874 9875 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9876 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9877 match(Set dst (MulAddVS2VI src1 src2)); 9878 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9879 ins_encode %{ 9880 int vector_len = 0; 9881 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9882 %} 9883 ins_pipe( pipe_slow ); 9884 %} 9885 9886 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9887 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9888 match(Set dst (MulAddVS2VI dst src1)); 9889 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9890 ins_encode %{ 9891 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9892 %} 9893 ins_pipe( pipe_slow ); 9894 %} 9895 9896 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9897 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9898 match(Set dst (MulAddVS2VI src1 src2)); 9899 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9900 ins_encode %{ 9901 int vector_len = 0; 9902 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9903 %} 9904 ins_pipe( pipe_slow ); 9905 %} 9906 9907 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9908 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9909 match(Set dst (MulAddVS2VI src1 src2)); 9910 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9911 ins_encode %{ 9912 int vector_len = 1; 9913 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9914 %} 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9919 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9920 match(Set dst (MulAddVS2VI src1 src2)); 9921 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9922 ins_encode %{ 9923 int vector_len = 2; 9924 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9925 %} 9926 ins_pipe( pipe_slow ); 9927 %} 9928 9929 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9930 9931 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9932 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9933 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9934 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9935 ins_encode %{ 9936 int vector_len = 0; 9937 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9938 %} 9939 ins_pipe( pipe_slow ); 9940 ins_cost(10); 9941 %} 9942 9943 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9944 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9945 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9946 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9947 ins_encode %{ 9948 int vector_len = 0; 9949 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9950 %} 9951 ins_pipe( pipe_slow ); 9952 ins_cost(10); 9953 %} 9954 9955 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9956 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9957 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9958 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9959 ins_encode %{ 9960 int vector_len = 1; 9961 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 ins_cost(10); 9965 %} 9966 9967 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9968 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9969 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9970 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9971 ins_encode %{ 9972 int vector_len = 2; 9973 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9974 %} 9975 ins_pipe( pipe_slow ); 9976 ins_cost(10); 9977 %} 9978 9979 // --------------------------------- PopCount -------------------------------------- 9980 9981 instruct vpopcount2I(vecD dst, vecD src) %{ 9982 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9983 match(Set dst (PopCountVI src)); 9984 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9985 ins_encode %{ 9986 int vector_len = 0; 9987 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9988 %} 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct vpopcount4I(vecX dst, vecX src) %{ 9993 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9994 match(Set dst (PopCountVI src)); 9995 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9996 ins_encode %{ 9997 int vector_len = 0; 9998 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9999 %} 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct vpopcount8I(vecY dst, vecY src) %{ 10004 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10005 match(Set dst (PopCountVI src)); 10006 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10007 ins_encode %{ 10008 int vector_len = 1; 10009 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10010 %} 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10015 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10016 match(Set dst (PopCountVI src)); 10017 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10018 ins_encode %{ 10019 int vector_len = 2; 10020 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10021 %} 10022 ins_pipe( pipe_slow ); 10023 %}