1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1425 case Op_CompareAndSwapL: 1426 #ifdef _LP64 1427 case Op_CompareAndSwapP: 1428 #endif 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 case Op_MaxD: 1454 case Op_MaxF: 1455 case Op_MinD: 1456 case Op_MinF: 1457 if (UseAVX < 1) // enabled for AVX only 1458 ret_value = false; 1459 break; 1460 } 1461 1462 return ret_value; // Per default match rules are supported. 1463 } 1464 1465 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1466 // identify extra cases that we might want to provide match rules for 1467 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1468 bool ret_value = match_rule_supported(opcode); 1469 if (ret_value) { 1470 switch (opcode) { 1471 case Op_AddVB: 1472 case Op_SubVB: 1473 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1474 ret_value = false; 1475 break; 1476 case Op_URShiftVS: 1477 case Op_RShiftVS: 1478 case Op_LShiftVS: 1479 case Op_MulVS: 1480 case Op_AddVS: 1481 case Op_SubVS: 1482 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1483 ret_value = false; 1484 break; 1485 case Op_CMoveVF: 1486 if (vlen != 8) 1487 ret_value = false; 1488 break; 1489 case Op_CMoveVD: 1490 if (vlen != 4) 1491 ret_value = false; 1492 break; 1493 } 1494 } 1495 1496 return ret_value; // Per default match rules are supported. 1497 } 1498 1499 const bool Matcher::has_predicated_vectors(void) { 1500 bool ret_value = false; 1501 if (UseAVX > 2) { 1502 ret_value = VM_Version::supports_avx512vl(); 1503 } 1504 1505 return ret_value; 1506 } 1507 1508 const int Matcher::float_pressure(int default_pressure_threshold) { 1509 int float_pressure_threshold = default_pressure_threshold; 1510 #ifdef _LP64 1511 if (UseAVX > 2) { 1512 // Increase pressure threshold on machines with AVX3 which have 1513 // 2x more XMM registers. 1514 float_pressure_threshold = default_pressure_threshold * 2; 1515 } 1516 #endif 1517 return float_pressure_threshold; 1518 } 1519 1520 // Max vector size in bytes. 0 if not supported. 1521 const int Matcher::vector_width_in_bytes(BasicType bt) { 1522 assert(is_java_primitive(bt), "only primitive type vectors"); 1523 if (UseSSE < 2) return 0; 1524 // SSE2 supports 128bit vectors for all types. 1525 // AVX2 supports 256bit vectors for all types. 1526 // AVX2/EVEX supports 512bit vectors for all types. 1527 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1528 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1529 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1530 size = (UseAVX > 2) ? 64 : 32; 1531 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1532 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1533 // Use flag to limit vector size. 1534 size = MIN2(size,(int)MaxVectorSize); 1535 // Minimum 2 values in vector (or 4 for bytes). 1536 switch (bt) { 1537 case T_DOUBLE: 1538 case T_LONG: 1539 if (size < 16) return 0; 1540 break; 1541 case T_FLOAT: 1542 case T_INT: 1543 if (size < 8) return 0; 1544 break; 1545 case T_BOOLEAN: 1546 if (size < 4) return 0; 1547 break; 1548 case T_CHAR: 1549 if (size < 4) return 0; 1550 break; 1551 case T_BYTE: 1552 if (size < 4) return 0; 1553 break; 1554 case T_SHORT: 1555 if (size < 4) return 0; 1556 break; 1557 default: 1558 ShouldNotReachHere(); 1559 } 1560 return size; 1561 } 1562 1563 // Limits on vector size (number of elements) loaded into vector. 1564 const int Matcher::max_vector_size(const BasicType bt) { 1565 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1566 } 1567 const int Matcher::min_vector_size(const BasicType bt) { 1568 int max_size = max_vector_size(bt); 1569 // Min size which can be loaded into vector is 4 bytes. 1570 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1571 return MIN2(size,max_size); 1572 } 1573 1574 // Vector ideal reg corresponding to specified size in bytes 1575 const uint Matcher::vector_ideal_reg(int size) { 1576 assert(MaxVectorSize >= size, ""); 1577 switch(size) { 1578 case 4: return Op_VecS; 1579 case 8: return Op_VecD; 1580 case 16: return Op_VecX; 1581 case 32: return Op_VecY; 1582 case 64: return Op_VecZ; 1583 } 1584 ShouldNotReachHere(); 1585 return 0; 1586 } 1587 1588 // Only lowest bits of xmm reg are used for vector shift count. 1589 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1590 return Op_VecS; 1591 } 1592 1593 // x86 supports misaligned vectors store/load. 1594 const bool Matcher::misaligned_vectors_ok() { 1595 return !AlignVector; // can be changed by flag 1596 } 1597 1598 // x86 AES instructions are compatible with SunJCE expanded 1599 // keys, hence we do not need to pass the original key to stubs 1600 const bool Matcher::pass_original_key_for_aes() { 1601 return false; 1602 } 1603 1604 1605 const bool Matcher::convi2l_type_required = true; 1606 1607 // Check for shift by small constant as well 1608 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1609 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1610 shift->in(2)->get_int() <= 3 && 1611 // Are there other uses besides address expressions? 1612 !matcher->is_visited(shift)) { 1613 address_visited.set(shift->_idx); // Flag as address_visited 1614 mstack.push(shift->in(2), Matcher::Visit); 1615 Node *conv = shift->in(1); 1616 #ifdef _LP64 1617 // Allow Matcher to match the rule which bypass 1618 // ConvI2L operation for an array index on LP64 1619 // if the index value is positive. 1620 if (conv->Opcode() == Op_ConvI2L && 1621 conv->as_Type()->type()->is_long()->_lo >= 0 && 1622 // Are there other uses besides address expressions? 1623 !matcher->is_visited(conv)) { 1624 address_visited.set(conv->_idx); // Flag as address_visited 1625 mstack.push(conv->in(1), Matcher::Pre_Visit); 1626 } else 1627 #endif 1628 mstack.push(conv, Matcher::Pre_Visit); 1629 return true; 1630 } 1631 return false; 1632 } 1633 1634 // Should the Matcher clone shifts on addressing modes, expecting them 1635 // to be subsumed into complex addressing expressions or compute them 1636 // into registers? 1637 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1638 Node *off = m->in(AddPNode::Offset); 1639 if (off->is_Con()) { 1640 address_visited.test_set(m->_idx); // Flag as address_visited 1641 Node *adr = m->in(AddPNode::Address); 1642 1643 // Intel can handle 2 adds in addressing mode 1644 // AtomicAdd is not an addressing expression. 1645 // Cheap to find it by looking for screwy base. 1646 if (adr->is_AddP() && 1647 !adr->in(AddPNode::Base)->is_top() && 1648 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1649 // Are there other uses besides address expressions? 1650 !is_visited(adr)) { 1651 address_visited.set(adr->_idx); // Flag as address_visited 1652 Node *shift = adr->in(AddPNode::Offset); 1653 if (!clone_shift(shift, this, mstack, address_visited)) { 1654 mstack.push(shift, Pre_Visit); 1655 } 1656 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1657 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1658 } else { 1659 mstack.push(adr, Pre_Visit); 1660 } 1661 1662 // Clone X+offset as it also folds into most addressing expressions 1663 mstack.push(off, Visit); 1664 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1665 return true; 1666 } else if (clone_shift(off, this, mstack, address_visited)) { 1667 address_visited.test_set(m->_idx); // Flag as address_visited 1668 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1669 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1670 return true; 1671 } 1672 return false; 1673 } 1674 1675 void Compile::reshape_address(AddPNode* addp) { 1676 } 1677 1678 // Helper methods for MachSpillCopyNode::implementation(). 1679 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1680 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1681 // In 64-bit VM size calculation is very complex. Emitting instructions 1682 // into scratch buffer is used to get size in 64-bit VM. 1683 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1684 assert(ireg == Op_VecS || // 32bit vector 1685 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1686 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1687 "no non-adjacent vector moves" ); 1688 if (cbuf) { 1689 MacroAssembler _masm(cbuf); 1690 int offset = __ offset(); 1691 switch (ireg) { 1692 case Op_VecS: // copy whole register 1693 case Op_VecD: 1694 case Op_VecX: 1695 #ifndef _LP64 1696 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1697 #else 1698 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1699 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1700 } else { 1701 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1702 } 1703 #endif 1704 break; 1705 case Op_VecY: 1706 #ifndef _LP64 1707 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1708 #else 1709 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1710 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1711 } else { 1712 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1713 } 1714 #endif 1715 break; 1716 case Op_VecZ: 1717 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1718 break; 1719 default: 1720 ShouldNotReachHere(); 1721 } 1722 int size = __ offset() - offset; 1723 #ifdef ASSERT 1724 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1725 assert(!do_size || size == 4, "incorrect size calculattion"); 1726 #endif 1727 return size; 1728 #ifndef PRODUCT 1729 } else if (!do_size) { 1730 switch (ireg) { 1731 case Op_VecS: 1732 case Op_VecD: 1733 case Op_VecX: 1734 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1735 break; 1736 case Op_VecY: 1737 case Op_VecZ: 1738 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1739 break; 1740 default: 1741 ShouldNotReachHere(); 1742 } 1743 #endif 1744 } 1745 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1746 return (UseAVX > 2) ? 6 : 4; 1747 } 1748 1749 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1750 int stack_offset, int reg, uint ireg, outputStream* st) { 1751 // In 64-bit VM size calculation is very complex. Emitting instructions 1752 // into scratch buffer is used to get size in 64-bit VM. 1753 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1754 if (cbuf) { 1755 MacroAssembler _masm(cbuf); 1756 int offset = __ offset(); 1757 if (is_load) { 1758 switch (ireg) { 1759 case Op_VecS: 1760 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1761 break; 1762 case Op_VecD: 1763 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1764 break; 1765 case Op_VecX: 1766 #ifndef _LP64 1767 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1768 #else 1769 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1770 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1771 } else { 1772 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1773 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1774 } 1775 #endif 1776 break; 1777 case Op_VecY: 1778 #ifndef _LP64 1779 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1780 #else 1781 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1782 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1783 } else { 1784 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1785 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1786 } 1787 #endif 1788 break; 1789 case Op_VecZ: 1790 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1791 break; 1792 default: 1793 ShouldNotReachHere(); 1794 } 1795 } else { // store 1796 switch (ireg) { 1797 case Op_VecS: 1798 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1799 break; 1800 case Op_VecD: 1801 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1802 break; 1803 case Op_VecX: 1804 #ifndef _LP64 1805 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1806 #else 1807 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1808 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1809 } 1810 else { 1811 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1812 } 1813 #endif 1814 break; 1815 case Op_VecY: 1816 #ifndef _LP64 1817 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1818 #else 1819 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1820 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1821 } 1822 else { 1823 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1824 } 1825 #endif 1826 break; 1827 case Op_VecZ: 1828 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1829 break; 1830 default: 1831 ShouldNotReachHere(); 1832 } 1833 } 1834 int size = __ offset() - offset; 1835 #ifdef ASSERT 1836 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1837 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1838 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1839 #endif 1840 return size; 1841 #ifndef PRODUCT 1842 } else if (!do_size) { 1843 if (is_load) { 1844 switch (ireg) { 1845 case Op_VecS: 1846 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1847 break; 1848 case Op_VecD: 1849 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1850 break; 1851 case Op_VecX: 1852 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1853 break; 1854 case Op_VecY: 1855 case Op_VecZ: 1856 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1857 break; 1858 default: 1859 ShouldNotReachHere(); 1860 } 1861 } else { // store 1862 switch (ireg) { 1863 case Op_VecS: 1864 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1865 break; 1866 case Op_VecD: 1867 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1868 break; 1869 case Op_VecX: 1870 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1871 break; 1872 case Op_VecY: 1873 case Op_VecZ: 1874 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1875 break; 1876 default: 1877 ShouldNotReachHere(); 1878 } 1879 } 1880 #endif 1881 } 1882 bool is_single_byte = false; 1883 int vec_len = 0; 1884 if ((UseAVX > 2) && (stack_offset != 0)) { 1885 int tuple_type = Assembler::EVEX_FVM; 1886 int input_size = Assembler::EVEX_32bit; 1887 switch (ireg) { 1888 case Op_VecS: 1889 tuple_type = Assembler::EVEX_T1S; 1890 break; 1891 case Op_VecD: 1892 tuple_type = Assembler::EVEX_T1S; 1893 input_size = Assembler::EVEX_64bit; 1894 break; 1895 case Op_VecX: 1896 break; 1897 case Op_VecY: 1898 vec_len = 1; 1899 break; 1900 case Op_VecZ: 1901 vec_len = 2; 1902 break; 1903 } 1904 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1905 } 1906 int offset_size = 0; 1907 int size = 5; 1908 if (UseAVX > 2 ) { 1909 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1910 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1911 size += 2; // Need an additional two bytes for EVEX encoding 1912 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1913 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1914 } else { 1915 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1916 size += 2; // Need an additional two bytes for EVEX encodding 1917 } 1918 } else { 1919 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1920 } 1921 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1922 return size+offset_size; 1923 } 1924 1925 static inline jint replicate4_imm(int con, int width) { 1926 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1927 assert(width == 1 || width == 2, "only byte or short types here"); 1928 int bit_width = width * 8; 1929 jint val = con; 1930 val &= (1 << bit_width) - 1; // mask off sign bits 1931 while(bit_width < 32) { 1932 val |= (val << bit_width); 1933 bit_width <<= 1; 1934 } 1935 return val; 1936 } 1937 1938 static inline jlong replicate8_imm(int con, int width) { 1939 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1940 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1941 int bit_width = width * 8; 1942 jlong val = con; 1943 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1944 while(bit_width < 64) { 1945 val |= (val << bit_width); 1946 bit_width <<= 1; 1947 } 1948 return val; 1949 } 1950 1951 #ifndef PRODUCT 1952 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1953 st->print("nop \t# %d bytes pad for loops and calls", _count); 1954 } 1955 #endif 1956 1957 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1958 MacroAssembler _masm(&cbuf); 1959 __ nop(_count); 1960 } 1961 1962 uint MachNopNode::size(PhaseRegAlloc*) const { 1963 return _count; 1964 } 1965 1966 #ifndef PRODUCT 1967 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1968 st->print("# breakpoint"); 1969 } 1970 #endif 1971 1972 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1973 MacroAssembler _masm(&cbuf); 1974 __ int3(); 1975 } 1976 1977 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1978 return MachNode::size(ra_); 1979 } 1980 1981 %} 1982 1983 encode %{ 1984 1985 enc_class call_epilog %{ 1986 if (VerifyStackAtCalls) { 1987 // Check that stack depth is unchanged: find majik cookie on stack 1988 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1989 MacroAssembler _masm(&cbuf); 1990 Label L; 1991 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1992 __ jccb(Assembler::equal, L); 1993 // Die if stack mismatch 1994 __ int3(); 1995 __ bind(L); 1996 } 1997 %} 1998 1999 %} 2000 2001 2002 //----------OPERANDS----------------------------------------------------------- 2003 // Operand definitions must precede instruction definitions for correct parsing 2004 // in the ADLC because operands constitute user defined types which are used in 2005 // instruction definitions. 2006 2007 operand vecZ() %{ 2008 constraint(ALLOC_IN_RC(vectorz_reg)); 2009 match(VecZ); 2010 2011 format %{ %} 2012 interface(REG_INTER); 2013 %} 2014 2015 operand legVecZ() %{ 2016 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2017 match(VecZ); 2018 2019 format %{ %} 2020 interface(REG_INTER); 2021 %} 2022 2023 // Comparison Code for FP conditional move 2024 operand cmpOp_vcmppd() %{ 2025 match(Bool); 2026 2027 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2028 n->as_Bool()->_test._test != BoolTest::no_overflow); 2029 format %{ "" %} 2030 interface(COND_INTER) %{ 2031 equal (0x0, "eq"); 2032 less (0x1, "lt"); 2033 less_equal (0x2, "le"); 2034 not_equal (0xC, "ne"); 2035 greater_equal(0xD, "ge"); 2036 greater (0xE, "gt"); 2037 //TODO cannot compile (adlc breaks) without two next lines with error: 2038 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2039 // equal' for overflow. 2040 overflow (0x20, "o"); // not really supported by the instruction 2041 no_overflow (0x21, "no"); // not really supported by the instruction 2042 %} 2043 %} 2044 2045 2046 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2047 2048 // ============================================================================ 2049 2050 instruct ShouldNotReachHere() %{ 2051 match(Halt); 2052 format %{ "ud2\t# ShouldNotReachHere" %} 2053 ins_encode %{ 2054 __ ud2(); 2055 %} 2056 ins_pipe(pipe_slow); 2057 %} 2058 2059 // =================================EVEX special=============================== 2060 2061 instruct setMask(rRegI dst, rRegI src) %{ 2062 predicate(Matcher::has_predicated_vectors()); 2063 match(Set dst (SetVectMaskI src)); 2064 effect(TEMP dst); 2065 format %{ "setvectmask $dst, $src" %} 2066 ins_encode %{ 2067 __ setvectmask($dst$$Register, $src$$Register); 2068 %} 2069 ins_pipe(pipe_slow); 2070 %} 2071 2072 // ============================================================================ 2073 2074 instruct addF_reg(regF dst, regF src) %{ 2075 predicate((UseSSE>=1) && (UseAVX == 0)); 2076 match(Set dst (AddF dst src)); 2077 2078 format %{ "addss $dst, $src" %} 2079 ins_cost(150); 2080 ins_encode %{ 2081 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2082 %} 2083 ins_pipe(pipe_slow); 2084 %} 2085 2086 instruct addF_mem(regF dst, memory src) %{ 2087 predicate((UseSSE>=1) && (UseAVX == 0)); 2088 match(Set dst (AddF dst (LoadF src))); 2089 2090 format %{ "addss $dst, $src" %} 2091 ins_cost(150); 2092 ins_encode %{ 2093 __ addss($dst$$XMMRegister, $src$$Address); 2094 %} 2095 ins_pipe(pipe_slow); 2096 %} 2097 2098 instruct addF_imm(regF dst, immF con) %{ 2099 predicate((UseSSE>=1) && (UseAVX == 0)); 2100 match(Set dst (AddF dst con)); 2101 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2102 ins_cost(150); 2103 ins_encode %{ 2104 __ addss($dst$$XMMRegister, $constantaddress($con)); 2105 %} 2106 ins_pipe(pipe_slow); 2107 %} 2108 2109 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2110 predicate(UseAVX > 0); 2111 match(Set dst (AddF src1 src2)); 2112 2113 format %{ "vaddss $dst, $src1, $src2" %} 2114 ins_cost(150); 2115 ins_encode %{ 2116 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2117 %} 2118 ins_pipe(pipe_slow); 2119 %} 2120 2121 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2122 predicate(UseAVX > 0); 2123 match(Set dst (AddF src1 (LoadF src2))); 2124 2125 format %{ "vaddss $dst, $src1, $src2" %} 2126 ins_cost(150); 2127 ins_encode %{ 2128 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2129 %} 2130 ins_pipe(pipe_slow); 2131 %} 2132 2133 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2134 predicate(UseAVX > 0); 2135 match(Set dst (AddF src con)); 2136 2137 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2138 ins_cost(150); 2139 ins_encode %{ 2140 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2141 %} 2142 ins_pipe(pipe_slow); 2143 %} 2144 2145 instruct addD_reg(regD dst, regD src) %{ 2146 predicate((UseSSE>=2) && (UseAVX == 0)); 2147 match(Set dst (AddD dst src)); 2148 2149 format %{ "addsd $dst, $src" %} 2150 ins_cost(150); 2151 ins_encode %{ 2152 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2153 %} 2154 ins_pipe(pipe_slow); 2155 %} 2156 2157 instruct addD_mem(regD dst, memory src) %{ 2158 predicate((UseSSE>=2) && (UseAVX == 0)); 2159 match(Set dst (AddD dst (LoadD src))); 2160 2161 format %{ "addsd $dst, $src" %} 2162 ins_cost(150); 2163 ins_encode %{ 2164 __ addsd($dst$$XMMRegister, $src$$Address); 2165 %} 2166 ins_pipe(pipe_slow); 2167 %} 2168 2169 instruct addD_imm(regD dst, immD con) %{ 2170 predicate((UseSSE>=2) && (UseAVX == 0)); 2171 match(Set dst (AddD dst con)); 2172 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2173 ins_cost(150); 2174 ins_encode %{ 2175 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2176 %} 2177 ins_pipe(pipe_slow); 2178 %} 2179 2180 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2181 predicate(UseAVX > 0); 2182 match(Set dst (AddD src1 src2)); 2183 2184 format %{ "vaddsd $dst, $src1, $src2" %} 2185 ins_cost(150); 2186 ins_encode %{ 2187 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2188 %} 2189 ins_pipe(pipe_slow); 2190 %} 2191 2192 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2193 predicate(UseAVX > 0); 2194 match(Set dst (AddD src1 (LoadD src2))); 2195 2196 format %{ "vaddsd $dst, $src1, $src2" %} 2197 ins_cost(150); 2198 ins_encode %{ 2199 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2200 %} 2201 ins_pipe(pipe_slow); 2202 %} 2203 2204 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2205 predicate(UseAVX > 0); 2206 match(Set dst (AddD src con)); 2207 2208 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2209 ins_cost(150); 2210 ins_encode %{ 2211 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2212 %} 2213 ins_pipe(pipe_slow); 2214 %} 2215 2216 instruct subF_reg(regF dst, regF src) %{ 2217 predicate((UseSSE>=1) && (UseAVX == 0)); 2218 match(Set dst (SubF dst src)); 2219 2220 format %{ "subss $dst, $src" %} 2221 ins_cost(150); 2222 ins_encode %{ 2223 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2224 %} 2225 ins_pipe(pipe_slow); 2226 %} 2227 2228 instruct subF_mem(regF dst, memory src) %{ 2229 predicate((UseSSE>=1) && (UseAVX == 0)); 2230 match(Set dst (SubF dst (LoadF src))); 2231 2232 format %{ "subss $dst, $src" %} 2233 ins_cost(150); 2234 ins_encode %{ 2235 __ subss($dst$$XMMRegister, $src$$Address); 2236 %} 2237 ins_pipe(pipe_slow); 2238 %} 2239 2240 instruct subF_imm(regF dst, immF con) %{ 2241 predicate((UseSSE>=1) && (UseAVX == 0)); 2242 match(Set dst (SubF dst con)); 2243 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2244 ins_cost(150); 2245 ins_encode %{ 2246 __ subss($dst$$XMMRegister, $constantaddress($con)); 2247 %} 2248 ins_pipe(pipe_slow); 2249 %} 2250 2251 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2252 predicate(UseAVX > 0); 2253 match(Set dst (SubF src1 src2)); 2254 2255 format %{ "vsubss $dst, $src1, $src2" %} 2256 ins_cost(150); 2257 ins_encode %{ 2258 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2259 %} 2260 ins_pipe(pipe_slow); 2261 %} 2262 2263 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2264 predicate(UseAVX > 0); 2265 match(Set dst (SubF src1 (LoadF src2))); 2266 2267 format %{ "vsubss $dst, $src1, $src2" %} 2268 ins_cost(150); 2269 ins_encode %{ 2270 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2271 %} 2272 ins_pipe(pipe_slow); 2273 %} 2274 2275 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2276 predicate(UseAVX > 0); 2277 match(Set dst (SubF src con)); 2278 2279 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2280 ins_cost(150); 2281 ins_encode %{ 2282 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2283 %} 2284 ins_pipe(pipe_slow); 2285 %} 2286 2287 instruct subD_reg(regD dst, regD src) %{ 2288 predicate((UseSSE>=2) && (UseAVX == 0)); 2289 match(Set dst (SubD dst src)); 2290 2291 format %{ "subsd $dst, $src" %} 2292 ins_cost(150); 2293 ins_encode %{ 2294 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2295 %} 2296 ins_pipe(pipe_slow); 2297 %} 2298 2299 instruct subD_mem(regD dst, memory src) %{ 2300 predicate((UseSSE>=2) && (UseAVX == 0)); 2301 match(Set dst (SubD dst (LoadD src))); 2302 2303 format %{ "subsd $dst, $src" %} 2304 ins_cost(150); 2305 ins_encode %{ 2306 __ subsd($dst$$XMMRegister, $src$$Address); 2307 %} 2308 ins_pipe(pipe_slow); 2309 %} 2310 2311 instruct subD_imm(regD dst, immD con) %{ 2312 predicate((UseSSE>=2) && (UseAVX == 0)); 2313 match(Set dst (SubD dst con)); 2314 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2315 ins_cost(150); 2316 ins_encode %{ 2317 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2318 %} 2319 ins_pipe(pipe_slow); 2320 %} 2321 2322 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2323 predicate(UseAVX > 0); 2324 match(Set dst (SubD src1 src2)); 2325 2326 format %{ "vsubsd $dst, $src1, $src2" %} 2327 ins_cost(150); 2328 ins_encode %{ 2329 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2330 %} 2331 ins_pipe(pipe_slow); 2332 %} 2333 2334 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2335 predicate(UseAVX > 0); 2336 match(Set dst (SubD src1 (LoadD src2))); 2337 2338 format %{ "vsubsd $dst, $src1, $src2" %} 2339 ins_cost(150); 2340 ins_encode %{ 2341 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2342 %} 2343 ins_pipe(pipe_slow); 2344 %} 2345 2346 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2347 predicate(UseAVX > 0); 2348 match(Set dst (SubD src con)); 2349 2350 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2351 ins_cost(150); 2352 ins_encode %{ 2353 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2354 %} 2355 ins_pipe(pipe_slow); 2356 %} 2357 2358 instruct mulF_reg(regF dst, regF src) %{ 2359 predicate((UseSSE>=1) && (UseAVX == 0)); 2360 match(Set dst (MulF dst src)); 2361 2362 format %{ "mulss $dst, $src" %} 2363 ins_cost(150); 2364 ins_encode %{ 2365 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2366 %} 2367 ins_pipe(pipe_slow); 2368 %} 2369 2370 instruct mulF_mem(regF dst, memory src) %{ 2371 predicate((UseSSE>=1) && (UseAVX == 0)); 2372 match(Set dst (MulF dst (LoadF src))); 2373 2374 format %{ "mulss $dst, $src" %} 2375 ins_cost(150); 2376 ins_encode %{ 2377 __ mulss($dst$$XMMRegister, $src$$Address); 2378 %} 2379 ins_pipe(pipe_slow); 2380 %} 2381 2382 instruct mulF_imm(regF dst, immF con) %{ 2383 predicate((UseSSE>=1) && (UseAVX == 0)); 2384 match(Set dst (MulF dst con)); 2385 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2386 ins_cost(150); 2387 ins_encode %{ 2388 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2389 %} 2390 ins_pipe(pipe_slow); 2391 %} 2392 2393 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2394 predicate(UseAVX > 0); 2395 match(Set dst (MulF src1 src2)); 2396 2397 format %{ "vmulss $dst, $src1, $src2" %} 2398 ins_cost(150); 2399 ins_encode %{ 2400 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2401 %} 2402 ins_pipe(pipe_slow); 2403 %} 2404 2405 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2406 predicate(UseAVX > 0); 2407 match(Set dst (MulF src1 (LoadF src2))); 2408 2409 format %{ "vmulss $dst, $src1, $src2" %} 2410 ins_cost(150); 2411 ins_encode %{ 2412 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2413 %} 2414 ins_pipe(pipe_slow); 2415 %} 2416 2417 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2418 predicate(UseAVX > 0); 2419 match(Set dst (MulF src con)); 2420 2421 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2422 ins_cost(150); 2423 ins_encode %{ 2424 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2425 %} 2426 ins_pipe(pipe_slow); 2427 %} 2428 2429 instruct mulD_reg(regD dst, regD src) %{ 2430 predicate((UseSSE>=2) && (UseAVX == 0)); 2431 match(Set dst (MulD dst src)); 2432 2433 format %{ "mulsd $dst, $src" %} 2434 ins_cost(150); 2435 ins_encode %{ 2436 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2437 %} 2438 ins_pipe(pipe_slow); 2439 %} 2440 2441 instruct mulD_mem(regD dst, memory src) %{ 2442 predicate((UseSSE>=2) && (UseAVX == 0)); 2443 match(Set dst (MulD dst (LoadD src))); 2444 2445 format %{ "mulsd $dst, $src" %} 2446 ins_cost(150); 2447 ins_encode %{ 2448 __ mulsd($dst$$XMMRegister, $src$$Address); 2449 %} 2450 ins_pipe(pipe_slow); 2451 %} 2452 2453 instruct mulD_imm(regD dst, immD con) %{ 2454 predicate((UseSSE>=2) && (UseAVX == 0)); 2455 match(Set dst (MulD dst con)); 2456 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2457 ins_cost(150); 2458 ins_encode %{ 2459 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2460 %} 2461 ins_pipe(pipe_slow); 2462 %} 2463 2464 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2465 predicate(UseAVX > 0); 2466 match(Set dst (MulD src1 src2)); 2467 2468 format %{ "vmulsd $dst, $src1, $src2" %} 2469 ins_cost(150); 2470 ins_encode %{ 2471 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2472 %} 2473 ins_pipe(pipe_slow); 2474 %} 2475 2476 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2477 predicate(UseAVX > 0); 2478 match(Set dst (MulD src1 (LoadD src2))); 2479 2480 format %{ "vmulsd $dst, $src1, $src2" %} 2481 ins_cost(150); 2482 ins_encode %{ 2483 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2484 %} 2485 ins_pipe(pipe_slow); 2486 %} 2487 2488 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2489 predicate(UseAVX > 0); 2490 match(Set dst (MulD src con)); 2491 2492 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2493 ins_cost(150); 2494 ins_encode %{ 2495 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct divF_reg(regF dst, regF src) %{ 2501 predicate((UseSSE>=1) && (UseAVX == 0)); 2502 match(Set dst (DivF dst src)); 2503 2504 format %{ "divss $dst, $src" %} 2505 ins_cost(150); 2506 ins_encode %{ 2507 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2508 %} 2509 ins_pipe(pipe_slow); 2510 %} 2511 2512 instruct divF_mem(regF dst, memory src) %{ 2513 predicate((UseSSE>=1) && (UseAVX == 0)); 2514 match(Set dst (DivF dst (LoadF src))); 2515 2516 format %{ "divss $dst, $src" %} 2517 ins_cost(150); 2518 ins_encode %{ 2519 __ divss($dst$$XMMRegister, $src$$Address); 2520 %} 2521 ins_pipe(pipe_slow); 2522 %} 2523 2524 instruct divF_imm(regF dst, immF con) %{ 2525 predicate((UseSSE>=1) && (UseAVX == 0)); 2526 match(Set dst (DivF dst con)); 2527 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2528 ins_cost(150); 2529 ins_encode %{ 2530 __ divss($dst$$XMMRegister, $constantaddress($con)); 2531 %} 2532 ins_pipe(pipe_slow); 2533 %} 2534 2535 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2536 predicate(UseAVX > 0); 2537 match(Set dst (DivF src1 src2)); 2538 2539 format %{ "vdivss $dst, $src1, $src2" %} 2540 ins_cost(150); 2541 ins_encode %{ 2542 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2543 %} 2544 ins_pipe(pipe_slow); 2545 %} 2546 2547 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2548 predicate(UseAVX > 0); 2549 match(Set dst (DivF src1 (LoadF src2))); 2550 2551 format %{ "vdivss $dst, $src1, $src2" %} 2552 ins_cost(150); 2553 ins_encode %{ 2554 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2555 %} 2556 ins_pipe(pipe_slow); 2557 %} 2558 2559 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2560 predicate(UseAVX > 0); 2561 match(Set dst (DivF src con)); 2562 2563 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2564 ins_cost(150); 2565 ins_encode %{ 2566 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2567 %} 2568 ins_pipe(pipe_slow); 2569 %} 2570 2571 instruct divD_reg(regD dst, regD src) %{ 2572 predicate((UseSSE>=2) && (UseAVX == 0)); 2573 match(Set dst (DivD dst src)); 2574 2575 format %{ "divsd $dst, $src" %} 2576 ins_cost(150); 2577 ins_encode %{ 2578 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 2583 instruct divD_mem(regD dst, memory src) %{ 2584 predicate((UseSSE>=2) && (UseAVX == 0)); 2585 match(Set dst (DivD dst (LoadD src))); 2586 2587 format %{ "divsd $dst, $src" %} 2588 ins_cost(150); 2589 ins_encode %{ 2590 __ divsd($dst$$XMMRegister, $src$$Address); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 2595 instruct divD_imm(regD dst, immD con) %{ 2596 predicate((UseSSE>=2) && (UseAVX == 0)); 2597 match(Set dst (DivD dst con)); 2598 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2599 ins_cost(150); 2600 ins_encode %{ 2601 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2602 %} 2603 ins_pipe(pipe_slow); 2604 %} 2605 2606 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2607 predicate(UseAVX > 0); 2608 match(Set dst (DivD src1 src2)); 2609 2610 format %{ "vdivsd $dst, $src1, $src2" %} 2611 ins_cost(150); 2612 ins_encode %{ 2613 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2614 %} 2615 ins_pipe(pipe_slow); 2616 %} 2617 2618 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2619 predicate(UseAVX > 0); 2620 match(Set dst (DivD src1 (LoadD src2))); 2621 2622 format %{ "vdivsd $dst, $src1, $src2" %} 2623 ins_cost(150); 2624 ins_encode %{ 2625 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2626 %} 2627 ins_pipe(pipe_slow); 2628 %} 2629 2630 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2631 predicate(UseAVX > 0); 2632 match(Set dst (DivD src con)); 2633 2634 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2635 ins_cost(150); 2636 ins_encode %{ 2637 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2638 %} 2639 ins_pipe(pipe_slow); 2640 %} 2641 2642 instruct absF_reg(regF dst) %{ 2643 predicate((UseSSE>=1) && (UseAVX == 0)); 2644 match(Set dst (AbsF dst)); 2645 ins_cost(150); 2646 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2647 ins_encode %{ 2648 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2649 %} 2650 ins_pipe(pipe_slow); 2651 %} 2652 2653 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2654 predicate(UseAVX > 0); 2655 match(Set dst (AbsF src)); 2656 ins_cost(150); 2657 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2658 ins_encode %{ 2659 int vector_len = 0; 2660 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2661 ExternalAddress(float_signmask()), vector_len); 2662 %} 2663 ins_pipe(pipe_slow); 2664 %} 2665 2666 instruct absD_reg(regD dst) %{ 2667 predicate((UseSSE>=2) && (UseAVX == 0)); 2668 match(Set dst (AbsD dst)); 2669 ins_cost(150); 2670 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2671 "# abs double by sign masking" %} 2672 ins_encode %{ 2673 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2674 %} 2675 ins_pipe(pipe_slow); 2676 %} 2677 2678 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2679 predicate(UseAVX > 0); 2680 match(Set dst (AbsD src)); 2681 ins_cost(150); 2682 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2683 "# abs double by sign masking" %} 2684 ins_encode %{ 2685 int vector_len = 0; 2686 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2687 ExternalAddress(double_signmask()), vector_len); 2688 %} 2689 ins_pipe(pipe_slow); 2690 %} 2691 2692 instruct negF_reg(regF dst) %{ 2693 predicate((UseSSE>=1) && (UseAVX == 0)); 2694 match(Set dst (NegF dst)); 2695 ins_cost(150); 2696 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2697 ins_encode %{ 2698 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2699 %} 2700 ins_pipe(pipe_slow); 2701 %} 2702 2703 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2704 predicate(UseAVX > 0); 2705 match(Set dst (NegF src)); 2706 ins_cost(150); 2707 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2708 ins_encode %{ 2709 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2710 ExternalAddress(float_signflip())); 2711 %} 2712 ins_pipe(pipe_slow); 2713 %} 2714 2715 instruct negD_reg(regD dst) %{ 2716 predicate((UseSSE>=2) && (UseAVX == 0)); 2717 match(Set dst (NegD dst)); 2718 ins_cost(150); 2719 format %{ "xorpd $dst, [0x8000000000000000]\t" 2720 "# neg double by sign flipping" %} 2721 ins_encode %{ 2722 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2723 %} 2724 ins_pipe(pipe_slow); 2725 %} 2726 2727 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2728 predicate(UseAVX > 0); 2729 match(Set dst (NegD src)); 2730 ins_cost(150); 2731 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2732 "# neg double by sign flipping" %} 2733 ins_encode %{ 2734 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2735 ExternalAddress(double_signflip())); 2736 %} 2737 ins_pipe(pipe_slow); 2738 %} 2739 2740 instruct sqrtF_reg(regF dst, regF src) %{ 2741 predicate(UseSSE>=1); 2742 match(Set dst (SqrtF src)); 2743 2744 format %{ "sqrtss $dst, $src" %} 2745 ins_cost(150); 2746 ins_encode %{ 2747 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2748 %} 2749 ins_pipe(pipe_slow); 2750 %} 2751 2752 instruct sqrtF_mem(regF dst, memory src) %{ 2753 predicate(UseSSE>=1); 2754 match(Set dst (SqrtF (LoadF src))); 2755 2756 format %{ "sqrtss $dst, $src" %} 2757 ins_cost(150); 2758 ins_encode %{ 2759 __ sqrtss($dst$$XMMRegister, $src$$Address); 2760 %} 2761 ins_pipe(pipe_slow); 2762 %} 2763 2764 instruct sqrtF_imm(regF dst, immF con) %{ 2765 predicate(UseSSE>=1); 2766 match(Set dst (SqrtF con)); 2767 2768 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2769 ins_cost(150); 2770 ins_encode %{ 2771 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2772 %} 2773 ins_pipe(pipe_slow); 2774 %} 2775 2776 instruct sqrtD_reg(regD dst, regD src) %{ 2777 predicate(UseSSE>=2); 2778 match(Set dst (SqrtD src)); 2779 2780 format %{ "sqrtsd $dst, $src" %} 2781 ins_cost(150); 2782 ins_encode %{ 2783 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2784 %} 2785 ins_pipe(pipe_slow); 2786 %} 2787 2788 instruct sqrtD_mem(regD dst, memory src) %{ 2789 predicate(UseSSE>=2); 2790 match(Set dst (SqrtD (LoadD src))); 2791 2792 format %{ "sqrtsd $dst, $src" %} 2793 ins_cost(150); 2794 ins_encode %{ 2795 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2796 %} 2797 ins_pipe(pipe_slow); 2798 %} 2799 2800 instruct sqrtD_imm(regD dst, immD con) %{ 2801 predicate(UseSSE>=2); 2802 match(Set dst (SqrtD con)); 2803 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2804 ins_cost(150); 2805 ins_encode %{ 2806 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2807 %} 2808 ins_pipe(pipe_slow); 2809 %} 2810 2811 instruct onspinwait() %{ 2812 match(OnSpinWait); 2813 ins_cost(200); 2814 2815 format %{ 2816 $$template 2817 $$emit$$"pause\t! membar_onspinwait" 2818 %} 2819 ins_encode %{ 2820 __ pause(); 2821 %} 2822 ins_pipe(pipe_slow); 2823 %} 2824 2825 // a * b + c 2826 instruct fmaD_reg(regD a, regD b, regD c) %{ 2827 predicate(UseFMA); 2828 match(Set c (FmaD c (Binary a b))); 2829 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2830 ins_cost(150); 2831 ins_encode %{ 2832 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2833 %} 2834 ins_pipe( pipe_slow ); 2835 %} 2836 2837 // a * b + c 2838 instruct fmaF_reg(regF a, regF b, regF c) %{ 2839 predicate(UseFMA); 2840 match(Set c (FmaF c (Binary a b))); 2841 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2842 ins_cost(150); 2843 ins_encode %{ 2844 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2845 %} 2846 ins_pipe( pipe_slow ); 2847 %} 2848 2849 // Following pseudo code describes the algorithm for max[FD]: 2850 // Min algorithm is on similar lines 2851 // btmp = (b < 0) ? a : b 2852 // atmp = (b < 0) ? b : a 2853 // Tmp = Max_Float( atmp , btmp) 2854 // Res = (atmp == NaN) ? atmp : Tmp 2855 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ 2856 predicate(UseAVX > 0); 2857 match(Set dst (MaxF a b)); 2858 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 2859 format %{ 2860 "blendvps $btmp,$b,$a,$b \n\t" 2861 "blendvps $atmp,$a,$b,$b \n\t" 2862 "vmaxps $tmp,$atmp,$btmp \n\t" 2863 "cmpps.unordered $btmp, $atmp, $atmp \n\t" 2864 "blendvps $dst,$tmp,$atmp,$btmp \n\t" 2865 %} 2866 ins_encode %{ 2867 int vector_len = 0; 2868 __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); 2869 __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); 2870 __ vmaxps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); 2871 __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); 2872 __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); 2873 %} 2874 ins_pipe( pipe_slow ); 2875 %} 2876 2877 2878 // max = java.lang.Max(double a , double b) 2879 instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ 2880 predicate(UseAVX > 0); 2881 match(Set dst (MaxD a b)); 2882 effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp); 2883 format %{ 2884 "blendvpd $btmp,$b,$a,$b \n\t" 2885 "blendvpd $atmp,$a,$b,$b \n\t" 2886 "vmaxpd $tmp,$atmp,$btmp \n\t" 2887 "cmppd.unordered $btmp, $atmp, $atmp \n\t" 2888 "blendvpd $dst,$tmp,$atmp,$btmp \n\t" 2889 %} 2890 ins_encode %{ 2891 int vector_len = 0; 2892 __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len); 2893 __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len); 2894 __ vmaxpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); 2895 __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); 2896 __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); 2897 %} 2898 ins_pipe( pipe_slow ); 2899 %} 2900 2901 2902 // min = java.lang.Min(float a , float b) 2903 instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{ 2904 predicate(UseAVX > 0); 2905 match(Set dst (MinF a b)); 2906 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 2907 format %{ 2908 "blendvps $atmp,$a,$b,$a \n\t" 2909 "blendvps $btmp,$b,$a,$a \n\t" 2910 "vminps $tmp,$atmp,$btmp \n\t" 2911 "cmpps.unordered $btmp, $atmp, $atmp \n\t" 2912 "blendvps $dst,$tmp,$atmp,$btmp \n\t" 2913 %} 2914 ins_encode %{ 2915 int vector_len = 0; 2916 __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); 2917 __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); 2918 __ vminps($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); 2919 __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); 2920 __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); 2921 %} 2922 ins_pipe( pipe_slow ); 2923 %} 2924 2925 // min = java.lang.Min(double a , double b) 2926 instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{ 2927 predicate(UseAVX > 0); 2928 match(Set dst (MinD a b)); 2929 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 2930 format %{ 2931 "blendvpd $atmp,$a,$b,$a \n\t" 2932 "blendvpd $btmp,$b,$a,$a \n\t" 2933 "vminpd $tmp,$atmp,$btmp \n\t" 2934 "cmppd.unordered $btmp, $atmp, $atmp \n\t" 2935 "blendvpd $dst,$tmp,$atmp,$btmp \n\t" 2936 %} 2937 ins_encode %{ 2938 int vector_len = 0; 2939 __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len); 2940 __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len); 2941 __ vminpd($tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vector_len); 2942 __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, 0x3, vector_len); 2943 __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len); 2944 %} 2945 ins_pipe( pipe_slow ); 2946 %} 2947 2948 // ====================VECTOR INSTRUCTIONS===================================== 2949 2950 2951 // Load vectors (4 bytes long) 2952 instruct loadV4(vecS dst, memory mem) %{ 2953 predicate(n->as_LoadVector()->memory_size() == 4); 2954 match(Set dst (LoadVector mem)); 2955 ins_cost(125); 2956 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2957 ins_encode %{ 2958 __ movdl($dst$$XMMRegister, $mem$$Address); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 // Load vectors (4 bytes long) 2964 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2965 match(Set dst src); 2966 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2967 ins_encode %{ 2968 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2969 %} 2970 ins_pipe( fpu_reg_reg ); 2971 %} 2972 2973 // Load vectors (4 bytes long) 2974 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2975 match(Set dst src); 2976 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2977 ins_encode %{ 2978 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2979 %} 2980 ins_pipe( fpu_reg_reg ); 2981 %} 2982 2983 // Load vectors (8 bytes long) 2984 instruct loadV8(vecD dst, memory mem) %{ 2985 predicate(n->as_LoadVector()->memory_size() == 8); 2986 match(Set dst (LoadVector mem)); 2987 ins_cost(125); 2988 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2989 ins_encode %{ 2990 __ movq($dst$$XMMRegister, $mem$$Address); 2991 %} 2992 ins_pipe( pipe_slow ); 2993 %} 2994 2995 // Load vectors (8 bytes long) 2996 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2997 match(Set dst src); 2998 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2999 ins_encode %{ 3000 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 3001 %} 3002 ins_pipe( fpu_reg_reg ); 3003 %} 3004 3005 // Load vectors (8 bytes long) 3006 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 3007 match(Set dst src); 3008 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 3009 ins_encode %{ 3010 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 3011 %} 3012 ins_pipe( fpu_reg_reg ); 3013 %} 3014 3015 // Load vectors (16 bytes long) 3016 instruct loadV16(vecX dst, memory mem) %{ 3017 predicate(n->as_LoadVector()->memory_size() == 16); 3018 match(Set dst (LoadVector mem)); 3019 ins_cost(125); 3020 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3021 ins_encode %{ 3022 __ movdqu($dst$$XMMRegister, $mem$$Address); 3023 %} 3024 ins_pipe( pipe_slow ); 3025 %} 3026 3027 // Load vectors (16 bytes long) 3028 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 3029 match(Set dst src); 3030 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 3031 ins_encode %{ 3032 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3033 int vector_len = 2; 3034 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3035 } else { 3036 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3037 } 3038 %} 3039 ins_pipe( fpu_reg_reg ); 3040 %} 3041 3042 // Load vectors (16 bytes long) 3043 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 3044 match(Set dst src); 3045 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 3046 ins_encode %{ 3047 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3048 int vector_len = 2; 3049 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3050 } else { 3051 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3052 } 3053 %} 3054 ins_pipe( fpu_reg_reg ); 3055 %} 3056 3057 // Load vectors (32 bytes long) 3058 instruct loadV32(vecY dst, memory mem) %{ 3059 predicate(n->as_LoadVector()->memory_size() == 32); 3060 match(Set dst (LoadVector mem)); 3061 ins_cost(125); 3062 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3063 ins_encode %{ 3064 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3065 %} 3066 ins_pipe( pipe_slow ); 3067 %} 3068 3069 // Load vectors (32 bytes long) 3070 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 3071 match(Set dst src); 3072 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3073 ins_encode %{ 3074 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3075 int vector_len = 2; 3076 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3077 } else { 3078 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3079 } 3080 %} 3081 ins_pipe( fpu_reg_reg ); 3082 %} 3083 3084 // Load vectors (32 bytes long) 3085 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 3086 match(Set dst src); 3087 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3088 ins_encode %{ 3089 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3090 int vector_len = 2; 3091 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3092 } else { 3093 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3094 } 3095 %} 3096 ins_pipe( fpu_reg_reg ); 3097 %} 3098 3099 // Load vectors (64 bytes long) 3100 instruct loadV64_dword(vecZ dst, memory mem) %{ 3101 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3102 match(Set dst (LoadVector mem)); 3103 ins_cost(125); 3104 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3105 ins_encode %{ 3106 int vector_len = 2; 3107 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3108 %} 3109 ins_pipe( pipe_slow ); 3110 %} 3111 3112 // Load vectors (64 bytes long) 3113 instruct loadV64_qword(vecZ dst, memory mem) %{ 3114 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3115 match(Set dst (LoadVector mem)); 3116 ins_cost(125); 3117 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3118 ins_encode %{ 3119 int vector_len = 2; 3120 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3121 %} 3122 ins_pipe( pipe_slow ); 3123 %} 3124 3125 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3126 match(Set dst src); 3127 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3128 ins_encode %{ 3129 int vector_len = 2; 3130 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3131 %} 3132 ins_pipe( fpu_reg_reg ); 3133 %} 3134 3135 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3136 match(Set dst src); 3137 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3138 ins_encode %{ 3139 int vector_len = 2; 3140 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3141 %} 3142 ins_pipe( fpu_reg_reg ); 3143 %} 3144 3145 // Store vectors 3146 instruct storeV4(memory mem, vecS src) %{ 3147 predicate(n->as_StoreVector()->memory_size() == 4); 3148 match(Set mem (StoreVector mem src)); 3149 ins_cost(145); 3150 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3151 ins_encode %{ 3152 __ movdl($mem$$Address, $src$$XMMRegister); 3153 %} 3154 ins_pipe( pipe_slow ); 3155 %} 3156 3157 instruct storeV8(memory mem, vecD src) %{ 3158 predicate(n->as_StoreVector()->memory_size() == 8); 3159 match(Set mem (StoreVector mem src)); 3160 ins_cost(145); 3161 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3162 ins_encode %{ 3163 __ movq($mem$$Address, $src$$XMMRegister); 3164 %} 3165 ins_pipe( pipe_slow ); 3166 %} 3167 3168 instruct storeV16(memory mem, vecX src) %{ 3169 predicate(n->as_StoreVector()->memory_size() == 16); 3170 match(Set mem (StoreVector mem src)); 3171 ins_cost(145); 3172 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3173 ins_encode %{ 3174 __ movdqu($mem$$Address, $src$$XMMRegister); 3175 %} 3176 ins_pipe( pipe_slow ); 3177 %} 3178 3179 instruct storeV32(memory mem, vecY src) %{ 3180 predicate(n->as_StoreVector()->memory_size() == 32); 3181 match(Set mem (StoreVector mem src)); 3182 ins_cost(145); 3183 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3184 ins_encode %{ 3185 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3186 %} 3187 ins_pipe( pipe_slow ); 3188 %} 3189 3190 instruct storeV64_dword(memory mem, vecZ src) %{ 3191 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3192 match(Set mem (StoreVector mem src)); 3193 ins_cost(145); 3194 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3195 ins_encode %{ 3196 int vector_len = 2; 3197 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3198 %} 3199 ins_pipe( pipe_slow ); 3200 %} 3201 3202 instruct storeV64_qword(memory mem, vecZ src) %{ 3203 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3204 match(Set mem (StoreVector mem src)); 3205 ins_cost(145); 3206 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3207 ins_encode %{ 3208 int vector_len = 2; 3209 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3210 %} 3211 ins_pipe( pipe_slow ); 3212 %} 3213 3214 // ====================LEGACY REPLICATE======================================= 3215 3216 instruct Repl4B_mem(vecS dst, memory mem) %{ 3217 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3218 match(Set dst (ReplicateB (LoadB mem))); 3219 format %{ "punpcklbw $dst,$mem\n\t" 3220 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3221 ins_encode %{ 3222 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3223 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3224 %} 3225 ins_pipe( pipe_slow ); 3226 %} 3227 3228 instruct Repl8B_mem(vecD dst, memory mem) %{ 3229 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3230 match(Set dst (ReplicateB (LoadB mem))); 3231 format %{ "punpcklbw $dst,$mem\n\t" 3232 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3233 ins_encode %{ 3234 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3235 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3236 %} 3237 ins_pipe( pipe_slow ); 3238 %} 3239 3240 instruct Repl16B(vecX dst, rRegI src) %{ 3241 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3242 match(Set dst (ReplicateB src)); 3243 format %{ "movd $dst,$src\n\t" 3244 "punpcklbw $dst,$dst\n\t" 3245 "pshuflw $dst,$dst,0x00\n\t" 3246 "punpcklqdq $dst,$dst\t! replicate16B" %} 3247 ins_encode %{ 3248 __ movdl($dst$$XMMRegister, $src$$Register); 3249 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3250 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3251 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3252 %} 3253 ins_pipe( pipe_slow ); 3254 %} 3255 3256 instruct Repl16B_mem(vecX dst, memory mem) %{ 3257 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3258 match(Set dst (ReplicateB (LoadB mem))); 3259 format %{ "punpcklbw $dst,$mem\n\t" 3260 "pshuflw $dst,$dst,0x00\n\t" 3261 "punpcklqdq $dst,$dst\t! replicate16B" %} 3262 ins_encode %{ 3263 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3264 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3265 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3266 %} 3267 ins_pipe( pipe_slow ); 3268 %} 3269 3270 instruct Repl32B(vecY dst, rRegI src) %{ 3271 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3272 match(Set dst (ReplicateB src)); 3273 format %{ "movd $dst,$src\n\t" 3274 "punpcklbw $dst,$dst\n\t" 3275 "pshuflw $dst,$dst,0x00\n\t" 3276 "punpcklqdq $dst,$dst\n\t" 3277 "vinserti128_high $dst,$dst\t! replicate32B" %} 3278 ins_encode %{ 3279 __ movdl($dst$$XMMRegister, $src$$Register); 3280 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3281 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3282 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3283 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 instruct Repl32B_mem(vecY dst, memory mem) %{ 3289 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3290 match(Set dst (ReplicateB (LoadB mem))); 3291 format %{ "punpcklbw $dst,$mem\n\t" 3292 "pshuflw $dst,$dst,0x00\n\t" 3293 "punpcklqdq $dst,$dst\n\t" 3294 "vinserti128_high $dst,$dst\t! replicate32B" %} 3295 ins_encode %{ 3296 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3297 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3298 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3299 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3300 %} 3301 ins_pipe( pipe_slow ); 3302 %} 3303 3304 instruct Repl64B(legVecZ dst, rRegI src) %{ 3305 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3306 match(Set dst (ReplicateB src)); 3307 format %{ "movd $dst,$src\n\t" 3308 "punpcklbw $dst,$dst\n\t" 3309 "pshuflw $dst,$dst,0x00\n\t" 3310 "punpcklqdq $dst,$dst\n\t" 3311 "vinserti128_high $dst,$dst\t" 3312 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3313 ins_encode %{ 3314 __ movdl($dst$$XMMRegister, $src$$Register); 3315 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3316 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3317 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3318 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3319 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3320 %} 3321 ins_pipe( pipe_slow ); 3322 %} 3323 3324 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3325 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3326 match(Set dst (ReplicateB (LoadB mem))); 3327 format %{ "punpcklbw $dst,$mem\n\t" 3328 "pshuflw $dst,$dst,0x00\n\t" 3329 "punpcklqdq $dst,$dst\n\t" 3330 "vinserti128_high $dst,$dst\t" 3331 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3332 ins_encode %{ 3333 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3334 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3335 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3336 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3337 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3338 %} 3339 ins_pipe( pipe_slow ); 3340 %} 3341 3342 instruct Repl16B_imm(vecX dst, immI con) %{ 3343 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3344 match(Set dst (ReplicateB con)); 3345 format %{ "movq $dst,[$constantaddress]\n\t" 3346 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3347 ins_encode %{ 3348 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3349 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3350 %} 3351 ins_pipe( pipe_slow ); 3352 %} 3353 3354 instruct Repl32B_imm(vecY dst, immI con) %{ 3355 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3356 match(Set dst (ReplicateB con)); 3357 format %{ "movq $dst,[$constantaddress]\n\t" 3358 "punpcklqdq $dst,$dst\n\t" 3359 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3360 ins_encode %{ 3361 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3363 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3364 %} 3365 ins_pipe( pipe_slow ); 3366 %} 3367 3368 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3369 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3370 match(Set dst (ReplicateB con)); 3371 format %{ "movq $dst,[$constantaddress]\n\t" 3372 "punpcklqdq $dst,$dst\n\t" 3373 "vinserti128_high $dst,$dst\t" 3374 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3375 ins_encode %{ 3376 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3377 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3378 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3379 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3380 %} 3381 ins_pipe( pipe_slow ); 3382 %} 3383 3384 instruct Repl4S(vecD dst, rRegI src) %{ 3385 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3386 match(Set dst (ReplicateS src)); 3387 format %{ "movd $dst,$src\n\t" 3388 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3389 ins_encode %{ 3390 __ movdl($dst$$XMMRegister, $src$$Register); 3391 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3392 %} 3393 ins_pipe( pipe_slow ); 3394 %} 3395 3396 instruct Repl4S_mem(vecD dst, memory mem) %{ 3397 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3398 match(Set dst (ReplicateS (LoadS mem))); 3399 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3400 ins_encode %{ 3401 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3402 %} 3403 ins_pipe( pipe_slow ); 3404 %} 3405 3406 instruct Repl8S(vecX dst, rRegI src) %{ 3407 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3408 match(Set dst (ReplicateS src)); 3409 format %{ "movd $dst,$src\n\t" 3410 "pshuflw $dst,$dst,0x00\n\t" 3411 "punpcklqdq $dst,$dst\t! replicate8S" %} 3412 ins_encode %{ 3413 __ movdl($dst$$XMMRegister, $src$$Register); 3414 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3415 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3416 %} 3417 ins_pipe( pipe_slow ); 3418 %} 3419 3420 instruct Repl8S_mem(vecX dst, memory mem) %{ 3421 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3422 match(Set dst (ReplicateS (LoadS mem))); 3423 format %{ "pshuflw $dst,$mem,0x00\n\t" 3424 "punpcklqdq $dst,$dst\t! replicate8S" %} 3425 ins_encode %{ 3426 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3427 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3428 %} 3429 ins_pipe( pipe_slow ); 3430 %} 3431 3432 instruct Repl8S_imm(vecX dst, immI con) %{ 3433 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3434 match(Set dst (ReplicateS con)); 3435 format %{ "movq $dst,[$constantaddress]\n\t" 3436 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3437 ins_encode %{ 3438 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3439 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3440 %} 3441 ins_pipe( pipe_slow ); 3442 %} 3443 3444 instruct Repl16S(vecY dst, rRegI src) %{ 3445 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3446 match(Set dst (ReplicateS src)); 3447 format %{ "movd $dst,$src\n\t" 3448 "pshuflw $dst,$dst,0x00\n\t" 3449 "punpcklqdq $dst,$dst\n\t" 3450 "vinserti128_high $dst,$dst\t! replicate16S" %} 3451 ins_encode %{ 3452 __ movdl($dst$$XMMRegister, $src$$Register); 3453 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3454 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3455 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3456 %} 3457 ins_pipe( pipe_slow ); 3458 %} 3459 3460 instruct Repl16S_mem(vecY dst, memory mem) %{ 3461 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3462 match(Set dst (ReplicateS (LoadS mem))); 3463 format %{ "pshuflw $dst,$mem,0x00\n\t" 3464 "punpcklqdq $dst,$dst\n\t" 3465 "vinserti128_high $dst,$dst\t! replicate16S" %} 3466 ins_encode %{ 3467 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3468 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3469 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3470 %} 3471 ins_pipe( pipe_slow ); 3472 %} 3473 3474 instruct Repl16S_imm(vecY dst, immI con) %{ 3475 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3476 match(Set dst (ReplicateS con)); 3477 format %{ "movq $dst,[$constantaddress]\n\t" 3478 "punpcklqdq $dst,$dst\n\t" 3479 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3480 ins_encode %{ 3481 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3482 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3483 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3484 %} 3485 ins_pipe( pipe_slow ); 3486 %} 3487 3488 instruct Repl32S(legVecZ dst, rRegI src) %{ 3489 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3490 match(Set dst (ReplicateS src)); 3491 format %{ "movd $dst,$src\n\t" 3492 "pshuflw $dst,$dst,0x00\n\t" 3493 "punpcklqdq $dst,$dst\n\t" 3494 "vinserti128_high $dst,$dst\t" 3495 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3496 ins_encode %{ 3497 __ movdl($dst$$XMMRegister, $src$$Register); 3498 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3499 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3500 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3501 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3502 %} 3503 ins_pipe( pipe_slow ); 3504 %} 3505 3506 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3507 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3508 match(Set dst (ReplicateS (LoadS mem))); 3509 format %{ "pshuflw $dst,$mem,0x00\n\t" 3510 "punpcklqdq $dst,$dst\n\t" 3511 "vinserti128_high $dst,$dst\t" 3512 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3513 ins_encode %{ 3514 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3515 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3516 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3517 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3523 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3524 match(Set dst (ReplicateS con)); 3525 format %{ "movq $dst,[$constantaddress]\n\t" 3526 "punpcklqdq $dst,$dst\n\t" 3527 "vinserti128_high $dst,$dst\t" 3528 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3529 ins_encode %{ 3530 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3531 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3532 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3533 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3534 %} 3535 ins_pipe( pipe_slow ); 3536 %} 3537 3538 instruct Repl4I(vecX dst, rRegI src) %{ 3539 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3540 match(Set dst (ReplicateI src)); 3541 format %{ "movd $dst,$src\n\t" 3542 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3543 ins_encode %{ 3544 __ movdl($dst$$XMMRegister, $src$$Register); 3545 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3546 %} 3547 ins_pipe( pipe_slow ); 3548 %} 3549 3550 instruct Repl4I_mem(vecX dst, memory mem) %{ 3551 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3552 match(Set dst (ReplicateI (LoadI mem))); 3553 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3554 ins_encode %{ 3555 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3556 %} 3557 ins_pipe( pipe_slow ); 3558 %} 3559 3560 instruct Repl8I(vecY dst, rRegI src) %{ 3561 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3562 match(Set dst (ReplicateI src)); 3563 format %{ "movd $dst,$src\n\t" 3564 "pshufd $dst,$dst,0x00\n\t" 3565 "vinserti128_high $dst,$dst\t! replicate8I" %} 3566 ins_encode %{ 3567 __ movdl($dst$$XMMRegister, $src$$Register); 3568 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3569 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3570 %} 3571 ins_pipe( pipe_slow ); 3572 %} 3573 3574 instruct Repl8I_mem(vecY dst, memory mem) %{ 3575 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3576 match(Set dst (ReplicateI (LoadI mem))); 3577 format %{ "pshufd $dst,$mem,0x00\n\t" 3578 "vinserti128_high $dst,$dst\t! replicate8I" %} 3579 ins_encode %{ 3580 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3581 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3582 %} 3583 ins_pipe( pipe_slow ); 3584 %} 3585 3586 instruct Repl16I(legVecZ dst, rRegI src) %{ 3587 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3588 match(Set dst (ReplicateI src)); 3589 format %{ "movd $dst,$src\n\t" 3590 "pshufd $dst,$dst,0x00\n\t" 3591 "vinserti128_high $dst,$dst\t" 3592 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3593 ins_encode %{ 3594 __ movdl($dst$$XMMRegister, $src$$Register); 3595 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3596 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3597 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3598 %} 3599 ins_pipe( pipe_slow ); 3600 %} 3601 3602 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3603 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3604 match(Set dst (ReplicateI (LoadI mem))); 3605 format %{ "pshufd $dst,$mem,0x00\n\t" 3606 "vinserti128_high $dst,$dst\t" 3607 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3608 ins_encode %{ 3609 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3610 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3611 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3612 %} 3613 ins_pipe( pipe_slow ); 3614 %} 3615 3616 instruct Repl4I_imm(vecX dst, immI con) %{ 3617 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3618 match(Set dst (ReplicateI con)); 3619 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3620 "punpcklqdq $dst,$dst" %} 3621 ins_encode %{ 3622 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3623 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3624 %} 3625 ins_pipe( pipe_slow ); 3626 %} 3627 3628 instruct Repl8I_imm(vecY dst, immI con) %{ 3629 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3630 match(Set dst (ReplicateI con)); 3631 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3632 "punpcklqdq $dst,$dst\n\t" 3633 "vinserti128_high $dst,$dst" %} 3634 ins_encode %{ 3635 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3636 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3637 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3638 %} 3639 ins_pipe( pipe_slow ); 3640 %} 3641 3642 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3643 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3644 match(Set dst (ReplicateI con)); 3645 format %{ "movq $dst,[$constantaddress]\t" 3646 "punpcklqdq $dst,$dst\n\t" 3647 "vinserti128_high $dst,$dst" 3648 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3649 ins_encode %{ 3650 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3651 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3652 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3653 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3654 %} 3655 ins_pipe( pipe_slow ); 3656 %} 3657 3658 // Long could be loaded into xmm register directly from memory. 3659 instruct Repl2L_mem(vecX dst, memory mem) %{ 3660 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3661 match(Set dst (ReplicateL (LoadL mem))); 3662 format %{ "movq $dst,$mem\n\t" 3663 "punpcklqdq $dst,$dst\t! replicate2L" %} 3664 ins_encode %{ 3665 __ movq($dst$$XMMRegister, $mem$$Address); 3666 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 // Replicate long (8 byte) scalar to be vector 3672 #ifdef _LP64 3673 instruct Repl4L(vecY dst, rRegL src) %{ 3674 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3675 match(Set dst (ReplicateL src)); 3676 format %{ "movdq $dst,$src\n\t" 3677 "punpcklqdq $dst,$dst\n\t" 3678 "vinserti128_high $dst,$dst\t! replicate4L" %} 3679 ins_encode %{ 3680 __ movdq($dst$$XMMRegister, $src$$Register); 3681 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3682 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct Repl8L(legVecZ dst, rRegL src) %{ 3688 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3689 match(Set dst (ReplicateL src)); 3690 format %{ "movdq $dst,$src\n\t" 3691 "punpcklqdq $dst,$dst\n\t" 3692 "vinserti128_high $dst,$dst\t" 3693 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3694 ins_encode %{ 3695 __ movdq($dst$$XMMRegister, $src$$Register); 3696 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3697 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3698 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3699 %} 3700 ins_pipe( pipe_slow ); 3701 %} 3702 #else // _LP64 3703 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3704 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3705 match(Set dst (ReplicateL src)); 3706 effect(TEMP dst, USE src, TEMP tmp); 3707 format %{ "movdl $dst,$src.lo\n\t" 3708 "movdl $tmp,$src.hi\n\t" 3709 "punpckldq $dst,$tmp\n\t" 3710 "punpcklqdq $dst,$dst\n\t" 3711 "vinserti128_high $dst,$dst\t! replicate4L" %} 3712 ins_encode %{ 3713 __ movdl($dst$$XMMRegister, $src$$Register); 3714 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3715 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3716 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3717 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3718 %} 3719 ins_pipe( pipe_slow ); 3720 %} 3721 3722 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3723 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3724 match(Set dst (ReplicateL src)); 3725 effect(TEMP dst, USE src, TEMP tmp); 3726 format %{ "movdl $dst,$src.lo\n\t" 3727 "movdl $tmp,$src.hi\n\t" 3728 "punpckldq $dst,$tmp\n\t" 3729 "punpcklqdq $dst,$dst\n\t" 3730 "vinserti128_high $dst,$dst\t" 3731 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3732 ins_encode %{ 3733 __ movdl($dst$$XMMRegister, $src$$Register); 3734 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3735 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3736 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3737 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3738 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3739 %} 3740 ins_pipe( pipe_slow ); 3741 %} 3742 #endif // _LP64 3743 3744 instruct Repl4L_imm(vecY dst, immL con) %{ 3745 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3746 match(Set dst (ReplicateL con)); 3747 format %{ "movq $dst,[$constantaddress]\n\t" 3748 "punpcklqdq $dst,$dst\n\t" 3749 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3750 ins_encode %{ 3751 __ movq($dst$$XMMRegister, $constantaddress($con)); 3752 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3753 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3754 %} 3755 ins_pipe( pipe_slow ); 3756 %} 3757 3758 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3759 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3760 match(Set dst (ReplicateL con)); 3761 format %{ "movq $dst,[$constantaddress]\n\t" 3762 "punpcklqdq $dst,$dst\n\t" 3763 "vinserti128_high $dst,$dst\t" 3764 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3765 ins_encode %{ 3766 __ movq($dst$$XMMRegister, $constantaddress($con)); 3767 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3768 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3769 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 3774 instruct Repl4L_mem(vecY dst, memory mem) %{ 3775 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3776 match(Set dst (ReplicateL (LoadL mem))); 3777 format %{ "movq $dst,$mem\n\t" 3778 "punpcklqdq $dst,$dst\n\t" 3779 "vinserti128_high $dst,$dst\t! replicate4L" %} 3780 ins_encode %{ 3781 __ movq($dst$$XMMRegister, $mem$$Address); 3782 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3783 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3789 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3790 match(Set dst (ReplicateL (LoadL mem))); 3791 format %{ "movq $dst,$mem\n\t" 3792 "punpcklqdq $dst,$dst\n\t" 3793 "vinserti128_high $dst,$dst\t" 3794 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3795 ins_encode %{ 3796 __ movq($dst$$XMMRegister, $mem$$Address); 3797 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3798 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3799 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3800 %} 3801 ins_pipe( pipe_slow ); 3802 %} 3803 3804 instruct Repl2F_mem(vecD dst, memory mem) %{ 3805 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3806 match(Set dst (ReplicateF (LoadF mem))); 3807 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3808 ins_encode %{ 3809 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3810 %} 3811 ins_pipe( pipe_slow ); 3812 %} 3813 3814 instruct Repl4F_mem(vecX dst, memory mem) %{ 3815 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3816 match(Set dst (ReplicateF (LoadF mem))); 3817 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3818 ins_encode %{ 3819 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3820 %} 3821 ins_pipe( pipe_slow ); 3822 %} 3823 3824 instruct Repl8F(vecY dst, vlRegF src) %{ 3825 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3826 match(Set dst (ReplicateF src)); 3827 format %{ "pshufd $dst,$src,0x00\n\t" 3828 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3829 ins_encode %{ 3830 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3831 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 instruct Repl8F_mem(vecY dst, memory mem) %{ 3837 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3838 match(Set dst (ReplicateF (LoadF mem))); 3839 format %{ "pshufd $dst,$mem,0x00\n\t" 3840 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3841 ins_encode %{ 3842 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3843 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3844 %} 3845 ins_pipe( pipe_slow ); 3846 %} 3847 3848 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3849 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3850 match(Set dst (ReplicateF src)); 3851 format %{ "pshufd $dst,$src,0x00\n\t" 3852 "vinsertf128_high $dst,$dst\t" 3853 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3854 ins_encode %{ 3855 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3856 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3857 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3858 %} 3859 ins_pipe( pipe_slow ); 3860 %} 3861 3862 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3863 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3864 match(Set dst (ReplicateF (LoadF mem))); 3865 format %{ "pshufd $dst,$mem,0x00\n\t" 3866 "vinsertf128_high $dst,$dst\t" 3867 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3868 ins_encode %{ 3869 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3870 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3871 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3872 %} 3873 ins_pipe( pipe_slow ); 3874 %} 3875 3876 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3877 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3878 match(Set dst (ReplicateF zero)); 3879 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3880 ins_encode %{ 3881 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3882 %} 3883 ins_pipe( fpu_reg_reg ); 3884 %} 3885 3886 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3887 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3888 match(Set dst (ReplicateF zero)); 3889 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3890 ins_encode %{ 3891 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3892 %} 3893 ins_pipe( fpu_reg_reg ); 3894 %} 3895 3896 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3897 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3898 match(Set dst (ReplicateF zero)); 3899 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3900 ins_encode %{ 3901 int vector_len = 1; 3902 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3903 %} 3904 ins_pipe( fpu_reg_reg ); 3905 %} 3906 3907 instruct Repl2D_mem(vecX dst, memory mem) %{ 3908 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3909 match(Set dst (ReplicateD (LoadD mem))); 3910 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3911 ins_encode %{ 3912 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 instruct Repl4D(vecY dst, vlRegD src) %{ 3918 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3919 match(Set dst (ReplicateD src)); 3920 format %{ "pshufd $dst,$src,0x44\n\t" 3921 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3922 ins_encode %{ 3923 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3924 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3925 %} 3926 ins_pipe( pipe_slow ); 3927 %} 3928 3929 instruct Repl4D_mem(vecY dst, memory mem) %{ 3930 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3931 match(Set dst (ReplicateD (LoadD mem))); 3932 format %{ "pshufd $dst,$mem,0x44\n\t" 3933 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3934 ins_encode %{ 3935 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3936 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3937 %} 3938 ins_pipe( pipe_slow ); 3939 %} 3940 3941 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3942 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3943 match(Set dst (ReplicateD src)); 3944 format %{ "pshufd $dst,$src,0x44\n\t" 3945 "vinsertf128_high $dst,$dst\t" 3946 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3947 ins_encode %{ 3948 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3949 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3950 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3951 %} 3952 ins_pipe( pipe_slow ); 3953 %} 3954 3955 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3956 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3957 match(Set dst (ReplicateD (LoadD mem))); 3958 format %{ "pshufd $dst,$mem,0x44\n\t" 3959 "vinsertf128_high $dst,$dst\t" 3960 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3961 ins_encode %{ 3962 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3963 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3964 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3965 %} 3966 ins_pipe( pipe_slow ); 3967 %} 3968 3969 // Replicate double (8 byte) scalar zero to be vector 3970 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3971 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3972 match(Set dst (ReplicateD zero)); 3973 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3974 ins_encode %{ 3975 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3976 %} 3977 ins_pipe( fpu_reg_reg ); 3978 %} 3979 3980 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3981 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3982 match(Set dst (ReplicateD zero)); 3983 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3984 ins_encode %{ 3985 int vector_len = 1; 3986 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3987 %} 3988 ins_pipe( fpu_reg_reg ); 3989 %} 3990 3991 // ====================GENERIC REPLICATE========================================== 3992 3993 // Replicate byte scalar to be vector 3994 instruct Repl4B(vecS dst, rRegI src) %{ 3995 predicate(n->as_Vector()->length() == 4); 3996 match(Set dst (ReplicateB src)); 3997 format %{ "movd $dst,$src\n\t" 3998 "punpcklbw $dst,$dst\n\t" 3999 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 4000 ins_encode %{ 4001 __ movdl($dst$$XMMRegister, $src$$Register); 4002 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4003 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4004 %} 4005 ins_pipe( pipe_slow ); 4006 %} 4007 4008 instruct Repl8B(vecD dst, rRegI src) %{ 4009 predicate(n->as_Vector()->length() == 8); 4010 match(Set dst (ReplicateB src)); 4011 format %{ "movd $dst,$src\n\t" 4012 "punpcklbw $dst,$dst\n\t" 4013 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 4014 ins_encode %{ 4015 __ movdl($dst$$XMMRegister, $src$$Register); 4016 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4017 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4018 %} 4019 ins_pipe( pipe_slow ); 4020 %} 4021 4022 // Replicate byte scalar immediate to be vector by loading from const table. 4023 instruct Repl4B_imm(vecS dst, immI con) %{ 4024 predicate(n->as_Vector()->length() == 4); 4025 match(Set dst (ReplicateB con)); 4026 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 4027 ins_encode %{ 4028 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 4029 %} 4030 ins_pipe( pipe_slow ); 4031 %} 4032 4033 instruct Repl8B_imm(vecD dst, immI con) %{ 4034 predicate(n->as_Vector()->length() == 8); 4035 match(Set dst (ReplicateB con)); 4036 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 4037 ins_encode %{ 4038 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4039 %} 4040 ins_pipe( pipe_slow ); 4041 %} 4042 4043 // Replicate byte scalar zero to be vector 4044 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 4045 predicate(n->as_Vector()->length() == 4); 4046 match(Set dst (ReplicateB zero)); 4047 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 4048 ins_encode %{ 4049 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4050 %} 4051 ins_pipe( fpu_reg_reg ); 4052 %} 4053 4054 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 4055 predicate(n->as_Vector()->length() == 8); 4056 match(Set dst (ReplicateB zero)); 4057 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 4058 ins_encode %{ 4059 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4060 %} 4061 ins_pipe( fpu_reg_reg ); 4062 %} 4063 4064 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 4065 predicate(n->as_Vector()->length() == 16); 4066 match(Set dst (ReplicateB zero)); 4067 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 4068 ins_encode %{ 4069 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4070 %} 4071 ins_pipe( fpu_reg_reg ); 4072 %} 4073 4074 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 4075 predicate(n->as_Vector()->length() == 32); 4076 match(Set dst (ReplicateB zero)); 4077 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 4078 ins_encode %{ 4079 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4080 int vector_len = 1; 4081 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4082 %} 4083 ins_pipe( fpu_reg_reg ); 4084 %} 4085 4086 // Replicate char/short (2 byte) scalar to be vector 4087 instruct Repl2S(vecS dst, rRegI src) %{ 4088 predicate(n->as_Vector()->length() == 2); 4089 match(Set dst (ReplicateS src)); 4090 format %{ "movd $dst,$src\n\t" 4091 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 4092 ins_encode %{ 4093 __ movdl($dst$$XMMRegister, $src$$Register); 4094 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4095 %} 4096 ins_pipe( fpu_reg_reg ); 4097 %} 4098 4099 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 4100 instruct Repl2S_imm(vecS dst, immI con) %{ 4101 predicate(n->as_Vector()->length() == 2); 4102 match(Set dst (ReplicateS con)); 4103 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4104 ins_encode %{ 4105 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4106 %} 4107 ins_pipe( fpu_reg_reg ); 4108 %} 4109 4110 instruct Repl4S_imm(vecD dst, immI con) %{ 4111 predicate(n->as_Vector()->length() == 4); 4112 match(Set dst (ReplicateS con)); 4113 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4114 ins_encode %{ 4115 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4116 %} 4117 ins_pipe( fpu_reg_reg ); 4118 %} 4119 4120 // Replicate char/short (2 byte) scalar zero to be vector 4121 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4122 predicate(n->as_Vector()->length() == 2); 4123 match(Set dst (ReplicateS zero)); 4124 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4125 ins_encode %{ 4126 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4127 %} 4128 ins_pipe( fpu_reg_reg ); 4129 %} 4130 4131 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4132 predicate(n->as_Vector()->length() == 4); 4133 match(Set dst (ReplicateS zero)); 4134 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4135 ins_encode %{ 4136 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4137 %} 4138 ins_pipe( fpu_reg_reg ); 4139 %} 4140 4141 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4142 predicate(n->as_Vector()->length() == 8); 4143 match(Set dst (ReplicateS zero)); 4144 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4145 ins_encode %{ 4146 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4147 %} 4148 ins_pipe( fpu_reg_reg ); 4149 %} 4150 4151 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4152 predicate(n->as_Vector()->length() == 16); 4153 match(Set dst (ReplicateS zero)); 4154 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4155 ins_encode %{ 4156 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4157 int vector_len = 1; 4158 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4159 %} 4160 ins_pipe( fpu_reg_reg ); 4161 %} 4162 4163 // Replicate integer (4 byte) scalar to be vector 4164 instruct Repl2I(vecD dst, rRegI src) %{ 4165 predicate(n->as_Vector()->length() == 2); 4166 match(Set dst (ReplicateI src)); 4167 format %{ "movd $dst,$src\n\t" 4168 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4169 ins_encode %{ 4170 __ movdl($dst$$XMMRegister, $src$$Register); 4171 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4172 %} 4173 ins_pipe( fpu_reg_reg ); 4174 %} 4175 4176 // Integer could be loaded into xmm register directly from memory. 4177 instruct Repl2I_mem(vecD dst, memory mem) %{ 4178 predicate(n->as_Vector()->length() == 2); 4179 match(Set dst (ReplicateI (LoadI mem))); 4180 format %{ "movd $dst,$mem\n\t" 4181 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4182 ins_encode %{ 4183 __ movdl($dst$$XMMRegister, $mem$$Address); 4184 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4185 %} 4186 ins_pipe( fpu_reg_reg ); 4187 %} 4188 4189 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4190 instruct Repl2I_imm(vecD dst, immI con) %{ 4191 predicate(n->as_Vector()->length() == 2); 4192 match(Set dst (ReplicateI con)); 4193 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4194 ins_encode %{ 4195 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4196 %} 4197 ins_pipe( fpu_reg_reg ); 4198 %} 4199 4200 // Replicate integer (4 byte) scalar zero to be vector 4201 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4202 predicate(n->as_Vector()->length() == 2); 4203 match(Set dst (ReplicateI zero)); 4204 format %{ "pxor $dst,$dst\t! replicate2I" %} 4205 ins_encode %{ 4206 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4207 %} 4208 ins_pipe( fpu_reg_reg ); 4209 %} 4210 4211 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4212 predicate(n->as_Vector()->length() == 4); 4213 match(Set dst (ReplicateI zero)); 4214 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4215 ins_encode %{ 4216 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4217 %} 4218 ins_pipe( fpu_reg_reg ); 4219 %} 4220 4221 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4222 predicate(n->as_Vector()->length() == 8); 4223 match(Set dst (ReplicateI zero)); 4224 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4225 ins_encode %{ 4226 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4227 int vector_len = 1; 4228 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4229 %} 4230 ins_pipe( fpu_reg_reg ); 4231 %} 4232 4233 // Replicate long (8 byte) scalar to be vector 4234 #ifdef _LP64 4235 instruct Repl2L(vecX dst, rRegL src) %{ 4236 predicate(n->as_Vector()->length() == 2); 4237 match(Set dst (ReplicateL src)); 4238 format %{ "movdq $dst,$src\n\t" 4239 "punpcklqdq $dst,$dst\t! replicate2L" %} 4240 ins_encode %{ 4241 __ movdq($dst$$XMMRegister, $src$$Register); 4242 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4243 %} 4244 ins_pipe( pipe_slow ); 4245 %} 4246 #else // _LP64 4247 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4248 predicate(n->as_Vector()->length() == 2); 4249 match(Set dst (ReplicateL src)); 4250 effect(TEMP dst, USE src, TEMP tmp); 4251 format %{ "movdl $dst,$src.lo\n\t" 4252 "movdl $tmp,$src.hi\n\t" 4253 "punpckldq $dst,$tmp\n\t" 4254 "punpcklqdq $dst,$dst\t! replicate2L"%} 4255 ins_encode %{ 4256 __ movdl($dst$$XMMRegister, $src$$Register); 4257 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4258 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4259 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4260 %} 4261 ins_pipe( pipe_slow ); 4262 %} 4263 #endif // _LP64 4264 4265 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4266 instruct Repl2L_imm(vecX dst, immL con) %{ 4267 predicate(n->as_Vector()->length() == 2); 4268 match(Set dst (ReplicateL con)); 4269 format %{ "movq $dst,[$constantaddress]\n\t" 4270 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4271 ins_encode %{ 4272 __ movq($dst$$XMMRegister, $constantaddress($con)); 4273 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 // Replicate long (8 byte) scalar zero to be vector 4279 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4280 predicate(n->as_Vector()->length() == 2); 4281 match(Set dst (ReplicateL zero)); 4282 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4283 ins_encode %{ 4284 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4285 %} 4286 ins_pipe( fpu_reg_reg ); 4287 %} 4288 4289 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4290 predicate(n->as_Vector()->length() == 4); 4291 match(Set dst (ReplicateL zero)); 4292 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4293 ins_encode %{ 4294 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4295 int vector_len = 1; 4296 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4297 %} 4298 ins_pipe( fpu_reg_reg ); 4299 %} 4300 4301 // Replicate float (4 byte) scalar to be vector 4302 instruct Repl2F(vecD dst, vlRegF src) %{ 4303 predicate(n->as_Vector()->length() == 2); 4304 match(Set dst (ReplicateF src)); 4305 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4306 ins_encode %{ 4307 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4308 %} 4309 ins_pipe( fpu_reg_reg ); 4310 %} 4311 4312 instruct Repl4F(vecX dst, vlRegF src) %{ 4313 predicate(n->as_Vector()->length() == 4); 4314 match(Set dst (ReplicateF src)); 4315 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4316 ins_encode %{ 4317 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4318 %} 4319 ins_pipe( pipe_slow ); 4320 %} 4321 4322 // Replicate double (8 bytes) scalar to be vector 4323 instruct Repl2D(vecX dst, vlRegD src) %{ 4324 predicate(n->as_Vector()->length() == 2); 4325 match(Set dst (ReplicateD src)); 4326 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4327 ins_encode %{ 4328 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4329 %} 4330 ins_pipe( pipe_slow ); 4331 %} 4332 4333 // ====================EVEX REPLICATE============================================= 4334 4335 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4336 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4337 match(Set dst (ReplicateB (LoadB mem))); 4338 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4339 ins_encode %{ 4340 int vector_len = 0; 4341 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4342 %} 4343 ins_pipe( pipe_slow ); 4344 %} 4345 4346 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4347 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4348 match(Set dst (ReplicateB (LoadB mem))); 4349 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4350 ins_encode %{ 4351 int vector_len = 0; 4352 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4353 %} 4354 ins_pipe( pipe_slow ); 4355 %} 4356 4357 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4358 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4359 match(Set dst (ReplicateB src)); 4360 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4361 ins_encode %{ 4362 int vector_len = 0; 4363 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4364 %} 4365 ins_pipe( pipe_slow ); 4366 %} 4367 4368 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4369 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4370 match(Set dst (ReplicateB (LoadB mem))); 4371 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4372 ins_encode %{ 4373 int vector_len = 0; 4374 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4375 %} 4376 ins_pipe( pipe_slow ); 4377 %} 4378 4379 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4380 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4381 match(Set dst (ReplicateB src)); 4382 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4383 ins_encode %{ 4384 int vector_len = 1; 4385 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4386 %} 4387 ins_pipe( pipe_slow ); 4388 %} 4389 4390 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4391 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4392 match(Set dst (ReplicateB (LoadB mem))); 4393 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4394 ins_encode %{ 4395 int vector_len = 1; 4396 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4397 %} 4398 ins_pipe( pipe_slow ); 4399 %} 4400 4401 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4402 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4403 match(Set dst (ReplicateB src)); 4404 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4405 ins_encode %{ 4406 int vector_len = 2; 4407 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 4412 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4413 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4414 match(Set dst (ReplicateB (LoadB mem))); 4415 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4416 ins_encode %{ 4417 int vector_len = 2; 4418 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4419 %} 4420 ins_pipe( pipe_slow ); 4421 %} 4422 4423 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4424 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4425 match(Set dst (ReplicateB con)); 4426 format %{ "movq $dst,[$constantaddress]\n\t" 4427 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4428 ins_encode %{ 4429 int vector_len = 0; 4430 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4431 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4432 %} 4433 ins_pipe( pipe_slow ); 4434 %} 4435 4436 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4437 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4438 match(Set dst (ReplicateB con)); 4439 format %{ "movq $dst,[$constantaddress]\n\t" 4440 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4441 ins_encode %{ 4442 int vector_len = 1; 4443 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4444 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4445 %} 4446 ins_pipe( pipe_slow ); 4447 %} 4448 4449 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4450 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4451 match(Set dst (ReplicateB con)); 4452 format %{ "movq $dst,[$constantaddress]\n\t" 4453 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4454 ins_encode %{ 4455 int vector_len = 2; 4456 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4457 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4463 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4464 match(Set dst (ReplicateB zero)); 4465 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4466 ins_encode %{ 4467 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4468 int vector_len = 2; 4469 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4470 %} 4471 ins_pipe( fpu_reg_reg ); 4472 %} 4473 4474 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4475 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4476 match(Set dst (ReplicateS src)); 4477 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4478 ins_encode %{ 4479 int vector_len = 0; 4480 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4481 %} 4482 ins_pipe( pipe_slow ); 4483 %} 4484 4485 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4486 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4487 match(Set dst (ReplicateS (LoadS mem))); 4488 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4489 ins_encode %{ 4490 int vector_len = 0; 4491 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4492 %} 4493 ins_pipe( pipe_slow ); 4494 %} 4495 4496 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4497 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4498 match(Set dst (ReplicateS src)); 4499 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4500 ins_encode %{ 4501 int vector_len = 0; 4502 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4503 %} 4504 ins_pipe( pipe_slow ); 4505 %} 4506 4507 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4508 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4509 match(Set dst (ReplicateS (LoadS mem))); 4510 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4511 ins_encode %{ 4512 int vector_len = 0; 4513 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4514 %} 4515 ins_pipe( pipe_slow ); 4516 %} 4517 4518 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4519 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4520 match(Set dst (ReplicateS src)); 4521 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4522 ins_encode %{ 4523 int vector_len = 1; 4524 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4525 %} 4526 ins_pipe( pipe_slow ); 4527 %} 4528 4529 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4530 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4531 match(Set dst (ReplicateS (LoadS mem))); 4532 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4533 ins_encode %{ 4534 int vector_len = 1; 4535 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4536 %} 4537 ins_pipe( pipe_slow ); 4538 %} 4539 4540 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4541 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4542 match(Set dst (ReplicateS src)); 4543 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4544 ins_encode %{ 4545 int vector_len = 2; 4546 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4547 %} 4548 ins_pipe( pipe_slow ); 4549 %} 4550 4551 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4552 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4553 match(Set dst (ReplicateS (LoadS mem))); 4554 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4555 ins_encode %{ 4556 int vector_len = 2; 4557 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4558 %} 4559 ins_pipe( pipe_slow ); 4560 %} 4561 4562 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4563 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4564 match(Set dst (ReplicateS con)); 4565 format %{ "movq $dst,[$constantaddress]\n\t" 4566 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4567 ins_encode %{ 4568 int vector_len = 0; 4569 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4570 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4571 %} 4572 ins_pipe( pipe_slow ); 4573 %} 4574 4575 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4576 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4577 match(Set dst (ReplicateS con)); 4578 format %{ "movq $dst,[$constantaddress]\n\t" 4579 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4580 ins_encode %{ 4581 int vector_len = 1; 4582 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4583 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4589 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4590 match(Set dst (ReplicateS con)); 4591 format %{ "movq $dst,[$constantaddress]\n\t" 4592 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4593 ins_encode %{ 4594 int vector_len = 2; 4595 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4596 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4597 %} 4598 ins_pipe( pipe_slow ); 4599 %} 4600 4601 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4602 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4603 match(Set dst (ReplicateS zero)); 4604 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4605 ins_encode %{ 4606 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4607 int vector_len = 2; 4608 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4609 %} 4610 ins_pipe( fpu_reg_reg ); 4611 %} 4612 4613 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4614 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4615 match(Set dst (ReplicateI src)); 4616 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4617 ins_encode %{ 4618 int vector_len = 0; 4619 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4620 %} 4621 ins_pipe( pipe_slow ); 4622 %} 4623 4624 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4625 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4626 match(Set dst (ReplicateI (LoadI mem))); 4627 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4628 ins_encode %{ 4629 int vector_len = 0; 4630 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4631 %} 4632 ins_pipe( pipe_slow ); 4633 %} 4634 4635 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4636 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4637 match(Set dst (ReplicateI src)); 4638 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4639 ins_encode %{ 4640 int vector_len = 1; 4641 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4642 %} 4643 ins_pipe( pipe_slow ); 4644 %} 4645 4646 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4647 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4648 match(Set dst (ReplicateI (LoadI mem))); 4649 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4650 ins_encode %{ 4651 int vector_len = 1; 4652 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4653 %} 4654 ins_pipe( pipe_slow ); 4655 %} 4656 4657 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4658 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4659 match(Set dst (ReplicateI src)); 4660 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4661 ins_encode %{ 4662 int vector_len = 2; 4663 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 4668 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4669 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4670 match(Set dst (ReplicateI (LoadI mem))); 4671 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4672 ins_encode %{ 4673 int vector_len = 2; 4674 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4675 %} 4676 ins_pipe( pipe_slow ); 4677 %} 4678 4679 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4680 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4681 match(Set dst (ReplicateI con)); 4682 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4683 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4684 ins_encode %{ 4685 int vector_len = 0; 4686 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4687 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4688 %} 4689 ins_pipe( pipe_slow ); 4690 %} 4691 4692 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4693 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4694 match(Set dst (ReplicateI con)); 4695 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4696 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4697 ins_encode %{ 4698 int vector_len = 1; 4699 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4700 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4701 %} 4702 ins_pipe( pipe_slow ); 4703 %} 4704 4705 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4706 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4707 match(Set dst (ReplicateI con)); 4708 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4709 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4710 ins_encode %{ 4711 int vector_len = 2; 4712 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4713 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4714 %} 4715 ins_pipe( pipe_slow ); 4716 %} 4717 4718 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4719 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4720 match(Set dst (ReplicateI zero)); 4721 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4722 ins_encode %{ 4723 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4724 int vector_len = 2; 4725 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4726 %} 4727 ins_pipe( fpu_reg_reg ); 4728 %} 4729 4730 // Replicate long (8 byte) scalar to be vector 4731 #ifdef _LP64 4732 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4733 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4734 match(Set dst (ReplicateL src)); 4735 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4736 ins_encode %{ 4737 int vector_len = 1; 4738 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4739 %} 4740 ins_pipe( pipe_slow ); 4741 %} 4742 4743 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4744 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4745 match(Set dst (ReplicateL src)); 4746 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4747 ins_encode %{ 4748 int vector_len = 2; 4749 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4750 %} 4751 ins_pipe( pipe_slow ); 4752 %} 4753 #else // _LP64 4754 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4755 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4756 match(Set dst (ReplicateL src)); 4757 effect(TEMP dst, USE src, TEMP tmp); 4758 format %{ "movdl $dst,$src.lo\n\t" 4759 "movdl $tmp,$src.hi\n\t" 4760 "punpckldq $dst,$tmp\n\t" 4761 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4762 ins_encode %{ 4763 int vector_len = 1; 4764 __ movdl($dst$$XMMRegister, $src$$Register); 4765 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4766 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4767 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4768 %} 4769 ins_pipe( pipe_slow ); 4770 %} 4771 4772 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4773 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4774 match(Set dst (ReplicateL src)); 4775 effect(TEMP dst, USE src, TEMP tmp); 4776 format %{ "movdl $dst,$src.lo\n\t" 4777 "movdl $tmp,$src.hi\n\t" 4778 "punpckldq $dst,$tmp\n\t" 4779 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4780 ins_encode %{ 4781 int vector_len = 2; 4782 __ movdl($dst$$XMMRegister, $src$$Register); 4783 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4784 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4785 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4786 %} 4787 ins_pipe( pipe_slow ); 4788 %} 4789 #endif // _LP64 4790 4791 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4792 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4793 match(Set dst (ReplicateL con)); 4794 format %{ "movq $dst,[$constantaddress]\n\t" 4795 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4796 ins_encode %{ 4797 int vector_len = 1; 4798 __ movq($dst$$XMMRegister, $constantaddress($con)); 4799 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4800 %} 4801 ins_pipe( pipe_slow ); 4802 %} 4803 4804 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4805 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4806 match(Set dst (ReplicateL con)); 4807 format %{ "movq $dst,[$constantaddress]\n\t" 4808 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4809 ins_encode %{ 4810 int vector_len = 2; 4811 __ movq($dst$$XMMRegister, $constantaddress($con)); 4812 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816 4817 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4818 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4819 match(Set dst (ReplicateL (LoadL mem))); 4820 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4821 ins_encode %{ 4822 int vector_len = 0; 4823 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4824 %} 4825 ins_pipe( pipe_slow ); 4826 %} 4827 4828 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4829 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4830 match(Set dst (ReplicateL (LoadL mem))); 4831 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4832 ins_encode %{ 4833 int vector_len = 1; 4834 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4835 %} 4836 ins_pipe( pipe_slow ); 4837 %} 4838 4839 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4840 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4841 match(Set dst (ReplicateL (LoadL mem))); 4842 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4843 ins_encode %{ 4844 int vector_len = 2; 4845 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4846 %} 4847 ins_pipe( pipe_slow ); 4848 %} 4849 4850 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4851 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4852 match(Set dst (ReplicateL zero)); 4853 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4854 ins_encode %{ 4855 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4856 int vector_len = 2; 4857 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4858 %} 4859 ins_pipe( fpu_reg_reg ); 4860 %} 4861 4862 instruct Repl8F_evex(vecY dst, regF src) %{ 4863 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4864 match(Set dst (ReplicateF src)); 4865 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4866 ins_encode %{ 4867 int vector_len = 1; 4868 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4869 %} 4870 ins_pipe( pipe_slow ); 4871 %} 4872 4873 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4874 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4875 match(Set dst (ReplicateF (LoadF mem))); 4876 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4877 ins_encode %{ 4878 int vector_len = 1; 4879 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4880 %} 4881 ins_pipe( pipe_slow ); 4882 %} 4883 4884 instruct Repl16F_evex(vecZ dst, regF src) %{ 4885 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4886 match(Set dst (ReplicateF src)); 4887 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4888 ins_encode %{ 4889 int vector_len = 2; 4890 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4891 %} 4892 ins_pipe( pipe_slow ); 4893 %} 4894 4895 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4896 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4897 match(Set dst (ReplicateF (LoadF mem))); 4898 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4899 ins_encode %{ 4900 int vector_len = 2; 4901 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4902 %} 4903 ins_pipe( pipe_slow ); 4904 %} 4905 4906 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4907 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4908 match(Set dst (ReplicateF zero)); 4909 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4910 ins_encode %{ 4911 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4912 int vector_len = 2; 4913 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4914 %} 4915 ins_pipe( fpu_reg_reg ); 4916 %} 4917 4918 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4919 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4920 match(Set dst (ReplicateF zero)); 4921 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4922 ins_encode %{ 4923 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4924 int vector_len = 2; 4925 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4926 %} 4927 ins_pipe( fpu_reg_reg ); 4928 %} 4929 4930 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4931 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4932 match(Set dst (ReplicateF zero)); 4933 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4934 ins_encode %{ 4935 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4936 int vector_len = 2; 4937 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4938 %} 4939 ins_pipe( fpu_reg_reg ); 4940 %} 4941 4942 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4943 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4944 match(Set dst (ReplicateF zero)); 4945 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4946 ins_encode %{ 4947 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4948 int vector_len = 2; 4949 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4950 %} 4951 ins_pipe( fpu_reg_reg ); 4952 %} 4953 4954 instruct Repl4D_evex(vecY dst, regD src) %{ 4955 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4956 match(Set dst (ReplicateD src)); 4957 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4958 ins_encode %{ 4959 int vector_len = 1; 4960 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4961 %} 4962 ins_pipe( pipe_slow ); 4963 %} 4964 4965 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4966 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4967 match(Set dst (ReplicateD (LoadD mem))); 4968 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4969 ins_encode %{ 4970 int vector_len = 1; 4971 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4972 %} 4973 ins_pipe( pipe_slow ); 4974 %} 4975 4976 instruct Repl8D_evex(vecZ dst, regD src) %{ 4977 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4978 match(Set dst (ReplicateD src)); 4979 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4980 ins_encode %{ 4981 int vector_len = 2; 4982 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4988 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4989 match(Set dst (ReplicateD (LoadD mem))); 4990 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4991 ins_encode %{ 4992 int vector_len = 2; 4993 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4994 %} 4995 ins_pipe( pipe_slow ); 4996 %} 4997 4998 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4999 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 5000 match(Set dst (ReplicateD zero)); 5001 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 5002 ins_encode %{ 5003 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5004 int vector_len = 2; 5005 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5006 %} 5007 ins_pipe( fpu_reg_reg ); 5008 %} 5009 5010 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 5011 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 5012 match(Set dst (ReplicateD zero)); 5013 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 5014 ins_encode %{ 5015 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5016 int vector_len = 2; 5017 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5018 %} 5019 ins_pipe( fpu_reg_reg ); 5020 %} 5021 5022 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 5023 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5024 match(Set dst (ReplicateD zero)); 5025 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 5026 ins_encode %{ 5027 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5028 int vector_len = 2; 5029 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5030 %} 5031 ins_pipe( fpu_reg_reg ); 5032 %} 5033 5034 // ====================REDUCTION ARITHMETIC======================================= 5035 5036 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5037 predicate(UseSSE > 2 && UseAVX == 0); 5038 match(Set dst (AddReductionVI src1 src2)); 5039 effect(TEMP tmp2, TEMP tmp); 5040 format %{ "movdqu $tmp2,$src2\n\t" 5041 "phaddd $tmp2,$tmp2\n\t" 5042 "movd $tmp,$src1\n\t" 5043 "paddd $tmp,$tmp2\n\t" 5044 "movd $dst,$tmp\t! add reduction2I" %} 5045 ins_encode %{ 5046 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 5047 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5048 __ movdl($tmp$$XMMRegister, $src1$$Register); 5049 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 5050 __ movdl($dst$$Register, $tmp$$XMMRegister); 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 5055 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5056 predicate(VM_Version::supports_avxonly()); 5057 match(Set dst (AddReductionVI src1 src2)); 5058 effect(TEMP tmp, TEMP tmp2); 5059 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5060 "movd $tmp2,$src1\n\t" 5061 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5062 "movd $dst,$tmp2\t! add reduction2I" %} 5063 ins_encode %{ 5064 int vector_len = 0; 5065 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5066 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5067 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5068 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5069 %} 5070 ins_pipe( pipe_slow ); 5071 %} 5072 5073 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5074 predicate(UseAVX > 2); 5075 match(Set dst (AddReductionVI src1 src2)); 5076 effect(TEMP tmp, TEMP tmp2); 5077 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5078 "vpaddd $tmp,$src2,$tmp2\n\t" 5079 "movd $tmp2,$src1\n\t" 5080 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5081 "movd $dst,$tmp2\t! add reduction2I" %} 5082 ins_encode %{ 5083 int vector_len = 0; 5084 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5085 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5086 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5087 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5088 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5089 %} 5090 ins_pipe( pipe_slow ); 5091 %} 5092 5093 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5094 predicate(UseSSE > 2 && UseAVX == 0); 5095 match(Set dst (AddReductionVI src1 src2)); 5096 effect(TEMP tmp, TEMP tmp2); 5097 format %{ "movdqu $tmp,$src2\n\t" 5098 "phaddd $tmp,$tmp\n\t" 5099 "phaddd $tmp,$tmp\n\t" 5100 "movd $tmp2,$src1\n\t" 5101 "paddd $tmp2,$tmp\n\t" 5102 "movd $dst,$tmp2\t! add reduction4I" %} 5103 ins_encode %{ 5104 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5105 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5106 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5107 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5108 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5109 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5115 predicate(VM_Version::supports_avxonly()); 5116 match(Set dst (AddReductionVI src1 src2)); 5117 effect(TEMP tmp, TEMP tmp2); 5118 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5119 "vphaddd $tmp,$tmp,$tmp\n\t" 5120 "movd $tmp2,$src1\n\t" 5121 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5122 "movd $dst,$tmp2\t! add reduction4I" %} 5123 ins_encode %{ 5124 int vector_len = 0; 5125 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5126 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5127 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5128 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5129 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5130 %} 5131 ins_pipe( pipe_slow ); 5132 %} 5133 5134 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5135 predicate(UseAVX > 2); 5136 match(Set dst (AddReductionVI src1 src2)); 5137 effect(TEMP tmp, TEMP tmp2); 5138 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5139 "vpaddd $tmp,$src2,$tmp2\n\t" 5140 "pshufd $tmp2,$tmp,0x1\n\t" 5141 "vpaddd $tmp,$tmp,$tmp2\n\t" 5142 "movd $tmp2,$src1\n\t" 5143 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5144 "movd $dst,$tmp2\t! add reduction4I" %} 5145 ins_encode %{ 5146 int vector_len = 0; 5147 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5148 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5149 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5150 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5151 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5152 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5153 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5154 %} 5155 ins_pipe( pipe_slow ); 5156 %} 5157 5158 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5159 predicate(VM_Version::supports_avxonly()); 5160 match(Set dst (AddReductionVI src1 src2)); 5161 effect(TEMP tmp, TEMP tmp2); 5162 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5163 "vphaddd $tmp,$tmp,$tmp2\n\t" 5164 "vextracti128_high $tmp2,$tmp\n\t" 5165 "vpaddd $tmp,$tmp,$tmp2\n\t" 5166 "movd $tmp2,$src1\n\t" 5167 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5168 "movd $dst,$tmp2\t! add reduction8I" %} 5169 ins_encode %{ 5170 int vector_len = 1; 5171 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5172 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5173 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5174 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5175 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5176 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5177 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5178 %} 5179 ins_pipe( pipe_slow ); 5180 %} 5181 5182 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5183 predicate(UseAVX > 2); 5184 match(Set dst (AddReductionVI src1 src2)); 5185 effect(TEMP tmp, TEMP tmp2); 5186 format %{ "vextracti128_high $tmp,$src2\n\t" 5187 "vpaddd $tmp,$tmp,$src2\n\t" 5188 "pshufd $tmp2,$tmp,0xE\n\t" 5189 "vpaddd $tmp,$tmp,$tmp2\n\t" 5190 "pshufd $tmp2,$tmp,0x1\n\t" 5191 "vpaddd $tmp,$tmp,$tmp2\n\t" 5192 "movd $tmp2,$src1\n\t" 5193 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5194 "movd $dst,$tmp2\t! add reduction8I" %} 5195 ins_encode %{ 5196 int vector_len = 0; 5197 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5198 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5199 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5200 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5201 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5202 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5203 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5204 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5205 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5206 %} 5207 ins_pipe( pipe_slow ); 5208 %} 5209 5210 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5211 predicate(UseAVX > 2); 5212 match(Set dst (AddReductionVI src1 src2)); 5213 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5214 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5215 "vpaddd $tmp3,$tmp3,$src2\n\t" 5216 "vextracti128_high $tmp,$tmp3\n\t" 5217 "vpaddd $tmp,$tmp,$tmp3\n\t" 5218 "pshufd $tmp2,$tmp,0xE\n\t" 5219 "vpaddd $tmp,$tmp,$tmp2\n\t" 5220 "pshufd $tmp2,$tmp,0x1\n\t" 5221 "vpaddd $tmp,$tmp,$tmp2\n\t" 5222 "movd $tmp2,$src1\n\t" 5223 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5224 "movd $dst,$tmp2\t! mul reduction16I" %} 5225 ins_encode %{ 5226 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5227 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5228 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5229 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5230 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5231 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5232 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5233 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5234 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5235 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5236 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5237 %} 5238 ins_pipe( pipe_slow ); 5239 %} 5240 5241 #ifdef _LP64 5242 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5243 predicate(UseAVX > 2); 5244 match(Set dst (AddReductionVL src1 src2)); 5245 effect(TEMP tmp, TEMP tmp2); 5246 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5247 "vpaddq $tmp,$src2,$tmp2\n\t" 5248 "movdq $tmp2,$src1\n\t" 5249 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5250 "movdq $dst,$tmp2\t! add reduction2L" %} 5251 ins_encode %{ 5252 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5253 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5254 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5255 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5256 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5257 %} 5258 ins_pipe( pipe_slow ); 5259 %} 5260 5261 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5262 predicate(UseAVX > 2); 5263 match(Set dst (AddReductionVL src1 src2)); 5264 effect(TEMP tmp, TEMP tmp2); 5265 format %{ "vextracti128_high $tmp,$src2\n\t" 5266 "vpaddq $tmp2,$tmp,$src2\n\t" 5267 "pshufd $tmp,$tmp2,0xE\n\t" 5268 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5269 "movdq $tmp,$src1\n\t" 5270 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5271 "movdq $dst,$tmp2\t! add reduction4L" %} 5272 ins_encode %{ 5273 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5274 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5275 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5276 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5277 __ movdq($tmp$$XMMRegister, $src1$$Register); 5278 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5279 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5280 %} 5281 ins_pipe( pipe_slow ); 5282 %} 5283 5284 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5285 predicate(UseAVX > 2); 5286 match(Set dst (AddReductionVL src1 src2)); 5287 effect(TEMP tmp, TEMP tmp2); 5288 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5289 "vpaddq $tmp2,$tmp2,$src2\n\t" 5290 "vextracti128_high $tmp,$tmp2\n\t" 5291 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5292 "pshufd $tmp,$tmp2,0xE\n\t" 5293 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5294 "movdq $tmp,$src1\n\t" 5295 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5296 "movdq $dst,$tmp2\t! add reduction8L" %} 5297 ins_encode %{ 5298 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5299 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5300 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5301 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5302 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5303 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5304 __ movdq($tmp$$XMMRegister, $src1$$Register); 5305 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5306 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5307 %} 5308 ins_pipe( pipe_slow ); 5309 %} 5310 #endif 5311 5312 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5313 predicate(UseSSE >= 1 && UseAVX == 0); 5314 match(Set dst (AddReductionVF dst src2)); 5315 effect(TEMP dst, TEMP tmp); 5316 format %{ "addss $dst,$src2\n\t" 5317 "pshufd $tmp,$src2,0x01\n\t" 5318 "addss $dst,$tmp\t! add reduction2F" %} 5319 ins_encode %{ 5320 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5321 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5322 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5323 %} 5324 ins_pipe( pipe_slow ); 5325 %} 5326 5327 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5328 predicate(UseAVX > 0); 5329 match(Set dst (AddReductionVF dst src2)); 5330 effect(TEMP dst, TEMP tmp); 5331 format %{ "vaddss $dst,$dst,$src2\n\t" 5332 "pshufd $tmp,$src2,0x01\n\t" 5333 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5334 ins_encode %{ 5335 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5336 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5337 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5338 %} 5339 ins_pipe( pipe_slow ); 5340 %} 5341 5342 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5343 predicate(UseSSE >= 1 && UseAVX == 0); 5344 match(Set dst (AddReductionVF dst src2)); 5345 effect(TEMP dst, TEMP tmp); 5346 format %{ "addss $dst,$src2\n\t" 5347 "pshufd $tmp,$src2,0x01\n\t" 5348 "addss $dst,$tmp\n\t" 5349 "pshufd $tmp,$src2,0x02\n\t" 5350 "addss $dst,$tmp\n\t" 5351 "pshufd $tmp,$src2,0x03\n\t" 5352 "addss $dst,$tmp\t! add reduction4F" %} 5353 ins_encode %{ 5354 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5355 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5356 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5357 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5358 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5359 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5360 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5361 %} 5362 ins_pipe( pipe_slow ); 5363 %} 5364 5365 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5366 predicate(UseAVX > 0); 5367 match(Set dst (AddReductionVF dst src2)); 5368 effect(TEMP tmp, TEMP dst); 5369 format %{ "vaddss $dst,dst,$src2\n\t" 5370 "pshufd $tmp,$src2,0x01\n\t" 5371 "vaddss $dst,$dst,$tmp\n\t" 5372 "pshufd $tmp,$src2,0x02\n\t" 5373 "vaddss $dst,$dst,$tmp\n\t" 5374 "pshufd $tmp,$src2,0x03\n\t" 5375 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5376 ins_encode %{ 5377 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5378 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5379 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5380 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5381 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5382 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5383 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5384 %} 5385 ins_pipe( pipe_slow ); 5386 %} 5387 5388 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5389 predicate(UseAVX > 0); 5390 match(Set dst (AddReductionVF dst src2)); 5391 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5392 format %{ "vaddss $dst,$dst,$src2\n\t" 5393 "pshufd $tmp,$src2,0x01\n\t" 5394 "vaddss $dst,$dst,$tmp\n\t" 5395 "pshufd $tmp,$src2,0x02\n\t" 5396 "vaddss $dst,$dst,$tmp\n\t" 5397 "pshufd $tmp,$src2,0x03\n\t" 5398 "vaddss $dst,$dst,$tmp\n\t" 5399 "vextractf128_high $tmp2,$src2\n\t" 5400 "vaddss $dst,$dst,$tmp2\n\t" 5401 "pshufd $tmp,$tmp2,0x01\n\t" 5402 "vaddss $dst,$dst,$tmp\n\t" 5403 "pshufd $tmp,$tmp2,0x02\n\t" 5404 "vaddss $dst,$dst,$tmp\n\t" 5405 "pshufd $tmp,$tmp2,0x03\n\t" 5406 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5407 ins_encode %{ 5408 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5409 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5410 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5411 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5412 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5413 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5414 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5415 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5416 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5417 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5418 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5419 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5420 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5421 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5422 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5423 %} 5424 ins_pipe( pipe_slow ); 5425 %} 5426 5427 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5428 predicate(UseAVX > 2); 5429 match(Set dst (AddReductionVF dst src2)); 5430 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5431 format %{ "vaddss $dst,$dst,$src2\n\t" 5432 "pshufd $tmp,$src2,0x01\n\t" 5433 "vaddss $dst,$dst,$tmp\n\t" 5434 "pshufd $tmp,$src2,0x02\n\t" 5435 "vaddss $dst,$dst,$tmp\n\t" 5436 "pshufd $tmp,$src2,0x03\n\t" 5437 "vaddss $dst,$dst,$tmp\n\t" 5438 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5439 "vaddss $dst,$dst,$tmp2\n\t" 5440 "pshufd $tmp,$tmp2,0x01\n\t" 5441 "vaddss $dst,$dst,$tmp\n\t" 5442 "pshufd $tmp,$tmp2,0x02\n\t" 5443 "vaddss $dst,$dst,$tmp\n\t" 5444 "pshufd $tmp,$tmp2,0x03\n\t" 5445 "vaddss $dst,$dst,$tmp\n\t" 5446 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5447 "vaddss $dst,$dst,$tmp2\n\t" 5448 "pshufd $tmp,$tmp2,0x01\n\t" 5449 "vaddss $dst,$dst,$tmp\n\t" 5450 "pshufd $tmp,$tmp2,0x02\n\t" 5451 "vaddss $dst,$dst,$tmp\n\t" 5452 "pshufd $tmp,$tmp2,0x03\n\t" 5453 "vaddss $dst,$dst,$tmp\n\t" 5454 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5455 "vaddss $dst,$dst,$tmp2\n\t" 5456 "pshufd $tmp,$tmp2,0x01\n\t" 5457 "vaddss $dst,$dst,$tmp\n\t" 5458 "pshufd $tmp,$tmp2,0x02\n\t" 5459 "vaddss $dst,$dst,$tmp\n\t" 5460 "pshufd $tmp,$tmp2,0x03\n\t" 5461 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5462 ins_encode %{ 5463 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5464 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5465 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5466 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5467 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5468 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5469 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5470 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5471 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5472 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5473 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5474 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5475 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5476 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5477 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5478 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5479 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5480 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5481 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5482 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5483 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5484 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5485 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5486 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5487 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5488 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5489 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5490 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5491 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5492 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5493 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5494 %} 5495 ins_pipe( pipe_slow ); 5496 %} 5497 5498 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5499 predicate(UseSSE >= 1 && UseAVX == 0); 5500 match(Set dst (AddReductionVD dst src2)); 5501 effect(TEMP tmp, TEMP dst); 5502 format %{ "addsd $dst,$src2\n\t" 5503 "pshufd $tmp,$src2,0xE\n\t" 5504 "addsd $dst,$tmp\t! add reduction2D" %} 5505 ins_encode %{ 5506 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5507 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5508 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5514 predicate(UseAVX > 0); 5515 match(Set dst (AddReductionVD dst src2)); 5516 effect(TEMP tmp, TEMP dst); 5517 format %{ "vaddsd $dst,$dst,$src2\n\t" 5518 "pshufd $tmp,$src2,0xE\n\t" 5519 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5520 ins_encode %{ 5521 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5522 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5523 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5524 %} 5525 ins_pipe( pipe_slow ); 5526 %} 5527 5528 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5529 predicate(UseAVX > 0); 5530 match(Set dst (AddReductionVD dst src2)); 5531 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5532 format %{ "vaddsd $dst,$dst,$src2\n\t" 5533 "pshufd $tmp,$src2,0xE\n\t" 5534 "vaddsd $dst,$dst,$tmp\n\t" 5535 "vextractf128 $tmp2,$src2,0x1\n\t" 5536 "vaddsd $dst,$dst,$tmp2\n\t" 5537 "pshufd $tmp,$tmp2,0xE\n\t" 5538 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5539 ins_encode %{ 5540 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5541 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5542 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5543 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5544 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5545 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5546 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5547 %} 5548 ins_pipe( pipe_slow ); 5549 %} 5550 5551 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5552 predicate(UseAVX > 2); 5553 match(Set dst (AddReductionVD dst src2)); 5554 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5555 format %{ "vaddsd $dst,$dst,$src2\n\t" 5556 "pshufd $tmp,$src2,0xE\n\t" 5557 "vaddsd $dst,$dst,$tmp\n\t" 5558 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5559 "vaddsd $dst,$dst,$tmp2\n\t" 5560 "pshufd $tmp,$tmp2,0xE\n\t" 5561 "vaddsd $dst,$dst,$tmp\n\t" 5562 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5563 "vaddsd $dst,$dst,$tmp2\n\t" 5564 "pshufd $tmp,$tmp2,0xE\n\t" 5565 "vaddsd $dst,$dst,$tmp\n\t" 5566 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5567 "vaddsd $dst,$dst,$tmp2\n\t" 5568 "pshufd $tmp,$tmp2,0xE\n\t" 5569 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5570 ins_encode %{ 5571 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5572 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5573 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5574 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5575 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5576 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5577 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5578 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5579 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5580 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5581 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5582 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5583 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5584 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5585 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5586 %} 5587 ins_pipe( pipe_slow ); 5588 %} 5589 5590 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5591 predicate(UseSSE > 3 && UseAVX == 0); 5592 match(Set dst (MulReductionVI src1 src2)); 5593 effect(TEMP tmp, TEMP tmp2); 5594 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5595 "pmulld $tmp2,$src2\n\t" 5596 "movd $tmp,$src1\n\t" 5597 "pmulld $tmp2,$tmp\n\t" 5598 "movd $dst,$tmp2\t! mul reduction2I" %} 5599 ins_encode %{ 5600 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5601 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5602 __ movdl($tmp$$XMMRegister, $src1$$Register); 5603 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5604 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5605 %} 5606 ins_pipe( pipe_slow ); 5607 %} 5608 5609 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5610 predicate(UseAVX > 0); 5611 match(Set dst (MulReductionVI src1 src2)); 5612 effect(TEMP tmp, TEMP tmp2); 5613 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5614 "vpmulld $tmp,$src2,$tmp2\n\t" 5615 "movd $tmp2,$src1\n\t" 5616 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5617 "movd $dst,$tmp2\t! mul reduction2I" %} 5618 ins_encode %{ 5619 int vector_len = 0; 5620 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5621 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5622 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5623 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5624 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5625 %} 5626 ins_pipe( pipe_slow ); 5627 %} 5628 5629 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5630 predicate(UseSSE > 3 && UseAVX == 0); 5631 match(Set dst (MulReductionVI src1 src2)); 5632 effect(TEMP tmp, TEMP tmp2); 5633 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5634 "pmulld $tmp2,$src2\n\t" 5635 "pshufd $tmp,$tmp2,0x1\n\t" 5636 "pmulld $tmp2,$tmp\n\t" 5637 "movd $tmp,$src1\n\t" 5638 "pmulld $tmp2,$tmp\n\t" 5639 "movd $dst,$tmp2\t! mul reduction4I" %} 5640 ins_encode %{ 5641 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5642 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5643 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5644 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5645 __ movdl($tmp$$XMMRegister, $src1$$Register); 5646 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5647 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5648 %} 5649 ins_pipe( pipe_slow ); 5650 %} 5651 5652 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5653 predicate(UseAVX > 0); 5654 match(Set dst (MulReductionVI src1 src2)); 5655 effect(TEMP tmp, TEMP tmp2); 5656 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5657 "vpmulld $tmp,$src2,$tmp2\n\t" 5658 "pshufd $tmp2,$tmp,0x1\n\t" 5659 "vpmulld $tmp,$tmp,$tmp2\n\t" 5660 "movd $tmp2,$src1\n\t" 5661 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5662 "movd $dst,$tmp2\t! mul reduction4I" %} 5663 ins_encode %{ 5664 int vector_len = 0; 5665 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5666 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5667 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5668 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5669 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5670 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5671 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5672 %} 5673 ins_pipe( pipe_slow ); 5674 %} 5675 5676 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5677 predicate(UseAVX > 1); 5678 match(Set dst (MulReductionVI src1 src2)); 5679 effect(TEMP tmp, TEMP tmp2); 5680 format %{ "vextracti128_high $tmp,$src2\n\t" 5681 "vpmulld $tmp,$tmp,$src2\n\t" 5682 "pshufd $tmp2,$tmp,0xE\n\t" 5683 "vpmulld $tmp,$tmp,$tmp2\n\t" 5684 "pshufd $tmp2,$tmp,0x1\n\t" 5685 "vpmulld $tmp,$tmp,$tmp2\n\t" 5686 "movd $tmp2,$src1\n\t" 5687 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5688 "movd $dst,$tmp2\t! mul reduction8I" %} 5689 ins_encode %{ 5690 int vector_len = 0; 5691 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5692 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5693 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5694 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5695 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5696 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5697 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5698 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5699 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5705 predicate(UseAVX > 2); 5706 match(Set dst (MulReductionVI src1 src2)); 5707 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5708 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5709 "vpmulld $tmp3,$tmp3,$src2\n\t" 5710 "vextracti128_high $tmp,$tmp3\n\t" 5711 "vpmulld $tmp,$tmp,$src2\n\t" 5712 "pshufd $tmp2,$tmp,0xE\n\t" 5713 "vpmulld $tmp,$tmp,$tmp2\n\t" 5714 "pshufd $tmp2,$tmp,0x1\n\t" 5715 "vpmulld $tmp,$tmp,$tmp2\n\t" 5716 "movd $tmp2,$src1\n\t" 5717 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5718 "movd $dst,$tmp2\t! mul reduction16I" %} 5719 ins_encode %{ 5720 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5721 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5722 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5723 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5724 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5725 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5726 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5727 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5728 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5729 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5730 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5731 %} 5732 ins_pipe( pipe_slow ); 5733 %} 5734 5735 #ifdef _LP64 5736 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5737 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5738 match(Set dst (MulReductionVL src1 src2)); 5739 effect(TEMP tmp, TEMP tmp2); 5740 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5741 "vpmullq $tmp,$src2,$tmp2\n\t" 5742 "movdq $tmp2,$src1\n\t" 5743 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5744 "movdq $dst,$tmp2\t! mul reduction2L" %} 5745 ins_encode %{ 5746 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5747 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5748 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5749 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5750 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5751 %} 5752 ins_pipe( pipe_slow ); 5753 %} 5754 5755 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5756 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5757 match(Set dst (MulReductionVL src1 src2)); 5758 effect(TEMP tmp, TEMP tmp2); 5759 format %{ "vextracti128_high $tmp,$src2\n\t" 5760 "vpmullq $tmp2,$tmp,$src2\n\t" 5761 "pshufd $tmp,$tmp2,0xE\n\t" 5762 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5763 "movdq $tmp,$src1\n\t" 5764 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5765 "movdq $dst,$tmp2\t! mul reduction4L" %} 5766 ins_encode %{ 5767 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5768 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5769 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5770 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5771 __ movdq($tmp$$XMMRegister, $src1$$Register); 5772 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5773 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5774 %} 5775 ins_pipe( pipe_slow ); 5776 %} 5777 5778 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5779 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5780 match(Set dst (MulReductionVL src1 src2)); 5781 effect(TEMP tmp, TEMP tmp2); 5782 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5783 "vpmullq $tmp2,$tmp2,$src2\n\t" 5784 "vextracti128_high $tmp,$tmp2\n\t" 5785 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5786 "pshufd $tmp,$tmp2,0xE\n\t" 5787 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5788 "movdq $tmp,$src1\n\t" 5789 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5790 "movdq $dst,$tmp2\t! mul reduction8L" %} 5791 ins_encode %{ 5792 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5793 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5794 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5795 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5796 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5797 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5798 __ movdq($tmp$$XMMRegister, $src1$$Register); 5799 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5800 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5801 %} 5802 ins_pipe( pipe_slow ); 5803 %} 5804 #endif 5805 5806 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5807 predicate(UseSSE >= 1 && UseAVX == 0); 5808 match(Set dst (MulReductionVF dst src2)); 5809 effect(TEMP dst, TEMP tmp); 5810 format %{ "mulss $dst,$src2\n\t" 5811 "pshufd $tmp,$src2,0x01\n\t" 5812 "mulss $dst,$tmp\t! mul reduction2F" %} 5813 ins_encode %{ 5814 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5815 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5816 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5822 predicate(UseAVX > 0); 5823 match(Set dst (MulReductionVF dst src2)); 5824 effect(TEMP tmp, TEMP dst); 5825 format %{ "vmulss $dst,$dst,$src2\n\t" 5826 "pshufd $tmp,$src2,0x01\n\t" 5827 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5828 ins_encode %{ 5829 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5830 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5831 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5837 predicate(UseSSE >= 1 && UseAVX == 0); 5838 match(Set dst (MulReductionVF dst src2)); 5839 effect(TEMP dst, TEMP tmp); 5840 format %{ "mulss $dst,$src2\n\t" 5841 "pshufd $tmp,$src2,0x01\n\t" 5842 "mulss $dst,$tmp\n\t" 5843 "pshufd $tmp,$src2,0x02\n\t" 5844 "mulss $dst,$tmp\n\t" 5845 "pshufd $tmp,$src2,0x03\n\t" 5846 "mulss $dst,$tmp\t! mul reduction4F" %} 5847 ins_encode %{ 5848 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5849 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5850 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5851 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5852 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5853 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5854 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5855 %} 5856 ins_pipe( pipe_slow ); 5857 %} 5858 5859 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5860 predicate(UseAVX > 0); 5861 match(Set dst (MulReductionVF dst src2)); 5862 effect(TEMP tmp, TEMP dst); 5863 format %{ "vmulss $dst,$dst,$src2\n\t" 5864 "pshufd $tmp,$src2,0x01\n\t" 5865 "vmulss $dst,$dst,$tmp\n\t" 5866 "pshufd $tmp,$src2,0x02\n\t" 5867 "vmulss $dst,$dst,$tmp\n\t" 5868 "pshufd $tmp,$src2,0x03\n\t" 5869 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5870 ins_encode %{ 5871 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5872 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5873 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5874 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5875 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5876 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5877 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5878 %} 5879 ins_pipe( pipe_slow ); 5880 %} 5881 5882 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5883 predicate(UseAVX > 0); 5884 match(Set dst (MulReductionVF dst src2)); 5885 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5886 format %{ "vmulss $dst,$dst,$src2\n\t" 5887 "pshufd $tmp,$src2,0x01\n\t" 5888 "vmulss $dst,$dst,$tmp\n\t" 5889 "pshufd $tmp,$src2,0x02\n\t" 5890 "vmulss $dst,$dst,$tmp\n\t" 5891 "pshufd $tmp,$src2,0x03\n\t" 5892 "vmulss $dst,$dst,$tmp\n\t" 5893 "vextractf128_high $tmp2,$src2\n\t" 5894 "vmulss $dst,$dst,$tmp2\n\t" 5895 "pshufd $tmp,$tmp2,0x01\n\t" 5896 "vmulss $dst,$dst,$tmp\n\t" 5897 "pshufd $tmp,$tmp2,0x02\n\t" 5898 "vmulss $dst,$dst,$tmp\n\t" 5899 "pshufd $tmp,$tmp2,0x03\n\t" 5900 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5901 ins_encode %{ 5902 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5903 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5904 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5905 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5906 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5907 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5908 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5909 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5910 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5911 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5912 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5913 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5914 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5915 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5916 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5917 %} 5918 ins_pipe( pipe_slow ); 5919 %} 5920 5921 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5922 predicate(UseAVX > 2); 5923 match(Set dst (MulReductionVF dst src2)); 5924 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5925 format %{ "vmulss $dst,$dst,$src2\n\t" 5926 "pshufd $tmp,$src2,0x01\n\t" 5927 "vmulss $dst,$dst,$tmp\n\t" 5928 "pshufd $tmp,$src2,0x02\n\t" 5929 "vmulss $dst,$dst,$tmp\n\t" 5930 "pshufd $tmp,$src2,0x03\n\t" 5931 "vmulss $dst,$dst,$tmp\n\t" 5932 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5933 "vmulss $dst,$dst,$tmp2\n\t" 5934 "pshufd $tmp,$tmp2,0x01\n\t" 5935 "vmulss $dst,$dst,$tmp\n\t" 5936 "pshufd $tmp,$tmp2,0x02\n\t" 5937 "vmulss $dst,$dst,$tmp\n\t" 5938 "pshufd $tmp,$tmp2,0x03\n\t" 5939 "vmulss $dst,$dst,$tmp\n\t" 5940 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5941 "vmulss $dst,$dst,$tmp2\n\t" 5942 "pshufd $tmp,$tmp2,0x01\n\t" 5943 "vmulss $dst,$dst,$tmp\n\t" 5944 "pshufd $tmp,$tmp2,0x02\n\t" 5945 "vmulss $dst,$dst,$tmp\n\t" 5946 "pshufd $tmp,$tmp2,0x03\n\t" 5947 "vmulss $dst,$dst,$tmp\n\t" 5948 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5949 "vmulss $dst,$dst,$tmp2\n\t" 5950 "pshufd $tmp,$tmp2,0x01\n\t" 5951 "vmulss $dst,$dst,$tmp\n\t" 5952 "pshufd $tmp,$tmp2,0x02\n\t" 5953 "vmulss $dst,$dst,$tmp\n\t" 5954 "pshufd $tmp,$tmp2,0x03\n\t" 5955 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5956 ins_encode %{ 5957 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5958 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5959 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5960 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5961 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5962 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5963 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5964 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5965 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5966 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5967 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5968 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5969 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5970 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5971 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5972 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5973 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5974 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5975 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5976 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5977 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5978 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5979 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5980 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5981 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5982 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5983 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5984 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5985 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5986 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5987 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5988 %} 5989 ins_pipe( pipe_slow ); 5990 %} 5991 5992 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5993 predicate(UseSSE >= 1 && UseAVX == 0); 5994 match(Set dst (MulReductionVD dst src2)); 5995 effect(TEMP dst, TEMP tmp); 5996 format %{ "mulsd $dst,$src2\n\t" 5997 "pshufd $tmp,$src2,0xE\n\t" 5998 "mulsd $dst,$tmp\t! mul reduction2D" %} 5999 ins_encode %{ 6000 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 6001 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6002 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 6003 %} 6004 ins_pipe( pipe_slow ); 6005 %} 6006 6007 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 6008 predicate(UseAVX > 0); 6009 match(Set dst (MulReductionVD dst src2)); 6010 effect(TEMP tmp, TEMP dst); 6011 format %{ "vmulsd $dst,$dst,$src2\n\t" 6012 "pshufd $tmp,$src2,0xE\n\t" 6013 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 6014 ins_encode %{ 6015 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6016 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6017 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6018 %} 6019 ins_pipe( pipe_slow ); 6020 %} 6021 6022 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 6023 predicate(UseAVX > 0); 6024 match(Set dst (MulReductionVD dst src2)); 6025 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6026 format %{ "vmulsd $dst,$dst,$src2\n\t" 6027 "pshufd $tmp,$src2,0xE\n\t" 6028 "vmulsd $dst,$dst,$tmp\n\t" 6029 "vextractf128_high $tmp2,$src2\n\t" 6030 "vmulsd $dst,$dst,$tmp2\n\t" 6031 "pshufd $tmp,$tmp2,0xE\n\t" 6032 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 6033 ins_encode %{ 6034 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6035 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6036 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6037 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6038 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6039 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6040 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6041 %} 6042 ins_pipe( pipe_slow ); 6043 %} 6044 6045 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 6046 predicate(UseAVX > 2); 6047 match(Set dst (MulReductionVD dst src2)); 6048 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6049 format %{ "vmulsd $dst,$dst,$src2\n\t" 6050 "pshufd $tmp,$src2,0xE\n\t" 6051 "vmulsd $dst,$dst,$tmp\n\t" 6052 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6053 "vmulsd $dst,$dst,$tmp2\n\t" 6054 "pshufd $tmp,$src2,0xE\n\t" 6055 "vmulsd $dst,$dst,$tmp\n\t" 6056 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6057 "vmulsd $dst,$dst,$tmp2\n\t" 6058 "pshufd $tmp,$tmp2,0xE\n\t" 6059 "vmulsd $dst,$dst,$tmp\n\t" 6060 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6061 "vmulsd $dst,$dst,$tmp2\n\t" 6062 "pshufd $tmp,$tmp2,0xE\n\t" 6063 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 6064 ins_encode %{ 6065 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6066 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6067 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6068 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6069 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6070 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6071 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6072 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6073 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6074 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6075 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6076 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6077 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6078 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6079 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6080 %} 6081 ins_pipe( pipe_slow ); 6082 %} 6083 6084 // ====================VECTOR ARITHMETIC======================================= 6085 6086 // --------------------------------- ADD -------------------------------------- 6087 6088 // Bytes vector add 6089 instruct vadd4B(vecS dst, vecS src) %{ 6090 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6091 match(Set dst (AddVB dst src)); 6092 format %{ "paddb $dst,$src\t! add packed4B" %} 6093 ins_encode %{ 6094 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6095 %} 6096 ins_pipe( pipe_slow ); 6097 %} 6098 6099 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 6100 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6101 match(Set dst (AddVB src1 src2)); 6102 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 6103 ins_encode %{ 6104 int vector_len = 0; 6105 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 6111 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6112 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6113 match(Set dst (AddVB src (LoadVector mem))); 6114 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6115 ins_encode %{ 6116 int vector_len = 0; 6117 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6118 %} 6119 ins_pipe( pipe_slow ); 6120 %} 6121 6122 instruct vadd8B(vecD dst, vecD src) %{ 6123 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6124 match(Set dst (AddVB dst src)); 6125 format %{ "paddb $dst,$src\t! add packed8B" %} 6126 ins_encode %{ 6127 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6128 %} 6129 ins_pipe( pipe_slow ); 6130 %} 6131 6132 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6133 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6134 match(Set dst (AddVB src1 src2)); 6135 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6136 ins_encode %{ 6137 int vector_len = 0; 6138 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6139 %} 6140 ins_pipe( pipe_slow ); 6141 %} 6142 6143 6144 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6145 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6146 match(Set dst (AddVB src (LoadVector mem))); 6147 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6148 ins_encode %{ 6149 int vector_len = 0; 6150 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vadd16B(vecX dst, vecX src) %{ 6156 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6157 match(Set dst (AddVB dst src)); 6158 format %{ "paddb $dst,$src\t! add packed16B" %} 6159 ins_encode %{ 6160 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6166 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6167 match(Set dst (AddVB src1 src2)); 6168 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6169 ins_encode %{ 6170 int vector_len = 0; 6171 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6177 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6178 match(Set dst (AddVB src (LoadVector mem))); 6179 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6180 ins_encode %{ 6181 int vector_len = 0; 6182 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6188 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6189 match(Set dst (AddVB src1 src2)); 6190 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6191 ins_encode %{ 6192 int vector_len = 1; 6193 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6199 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6200 match(Set dst (AddVB src (LoadVector mem))); 6201 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6202 ins_encode %{ 6203 int vector_len = 1; 6204 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6210 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6211 match(Set dst (AddVB src1 src2)); 6212 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6213 ins_encode %{ 6214 int vector_len = 2; 6215 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6216 %} 6217 ins_pipe( pipe_slow ); 6218 %} 6219 6220 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6221 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6222 match(Set dst (AddVB src (LoadVector mem))); 6223 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6224 ins_encode %{ 6225 int vector_len = 2; 6226 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 // Shorts/Chars vector add 6232 instruct vadd2S(vecS dst, vecS src) %{ 6233 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6234 match(Set dst (AddVS dst src)); 6235 format %{ "paddw $dst,$src\t! add packed2S" %} 6236 ins_encode %{ 6237 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6243 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6244 match(Set dst (AddVS src1 src2)); 6245 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6246 ins_encode %{ 6247 int vector_len = 0; 6248 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6249 %} 6250 ins_pipe( pipe_slow ); 6251 %} 6252 6253 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6254 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6255 match(Set dst (AddVS src (LoadVector mem))); 6256 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6257 ins_encode %{ 6258 int vector_len = 0; 6259 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vadd4S(vecD dst, vecD src) %{ 6265 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6266 match(Set dst (AddVS dst src)); 6267 format %{ "paddw $dst,$src\t! add packed4S" %} 6268 ins_encode %{ 6269 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6276 match(Set dst (AddVS src1 src2)); 6277 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6278 ins_encode %{ 6279 int vector_len = 0; 6280 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6286 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6287 match(Set dst (AddVS src (LoadVector mem))); 6288 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6289 ins_encode %{ 6290 int vector_len = 0; 6291 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vadd8S(vecX dst, vecX src) %{ 6297 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6298 match(Set dst (AddVS dst src)); 6299 format %{ "paddw $dst,$src\t! add packed8S" %} 6300 ins_encode %{ 6301 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6307 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6308 match(Set dst (AddVS src1 src2)); 6309 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6310 ins_encode %{ 6311 int vector_len = 0; 6312 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6318 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6319 match(Set dst (AddVS src (LoadVector mem))); 6320 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6321 ins_encode %{ 6322 int vector_len = 0; 6323 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6329 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6330 match(Set dst (AddVS src1 src2)); 6331 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6332 ins_encode %{ 6333 int vector_len = 1; 6334 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6340 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6341 match(Set dst (AddVS src (LoadVector mem))); 6342 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6343 ins_encode %{ 6344 int vector_len = 1; 6345 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6351 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6352 match(Set dst (AddVS src1 src2)); 6353 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6354 ins_encode %{ 6355 int vector_len = 2; 6356 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6362 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6363 match(Set dst (AddVS src (LoadVector mem))); 6364 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6365 ins_encode %{ 6366 int vector_len = 2; 6367 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 // Integers vector add 6373 instruct vadd2I(vecD dst, vecD src) %{ 6374 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6375 match(Set dst (AddVI dst src)); 6376 format %{ "paddd $dst,$src\t! add packed2I" %} 6377 ins_encode %{ 6378 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6384 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6385 match(Set dst (AddVI src1 src2)); 6386 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6387 ins_encode %{ 6388 int vector_len = 0; 6389 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6396 match(Set dst (AddVI src (LoadVector mem))); 6397 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6398 ins_encode %{ 6399 int vector_len = 0; 6400 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 instruct vadd4I(vecX dst, vecX src) %{ 6406 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6407 match(Set dst (AddVI dst src)); 6408 format %{ "paddd $dst,$src\t! add packed4I" %} 6409 ins_encode %{ 6410 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6417 match(Set dst (AddVI src1 src2)); 6418 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6419 ins_encode %{ 6420 int vector_len = 0; 6421 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6428 match(Set dst (AddVI src (LoadVector mem))); 6429 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6430 ins_encode %{ 6431 int vector_len = 0; 6432 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6438 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6439 match(Set dst (AddVI src1 src2)); 6440 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6441 ins_encode %{ 6442 int vector_len = 1; 6443 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6449 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6450 match(Set dst (AddVI src (LoadVector mem))); 6451 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6452 ins_encode %{ 6453 int vector_len = 1; 6454 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6460 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6461 match(Set dst (AddVI src1 src2)); 6462 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6463 ins_encode %{ 6464 int vector_len = 2; 6465 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6471 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6472 match(Set dst (AddVI src (LoadVector mem))); 6473 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6474 ins_encode %{ 6475 int vector_len = 2; 6476 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 // Longs vector add 6482 instruct vadd2L(vecX dst, vecX src) %{ 6483 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6484 match(Set dst (AddVL dst src)); 6485 format %{ "paddq $dst,$src\t! add packed2L" %} 6486 ins_encode %{ 6487 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6493 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6494 match(Set dst (AddVL src1 src2)); 6495 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6496 ins_encode %{ 6497 int vector_len = 0; 6498 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6504 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6505 match(Set dst (AddVL src (LoadVector mem))); 6506 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6507 ins_encode %{ 6508 int vector_len = 0; 6509 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6515 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6516 match(Set dst (AddVL src1 src2)); 6517 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6518 ins_encode %{ 6519 int vector_len = 1; 6520 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6526 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6527 match(Set dst (AddVL src (LoadVector mem))); 6528 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6529 ins_encode %{ 6530 int vector_len = 1; 6531 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6537 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6538 match(Set dst (AddVL src1 src2)); 6539 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6540 ins_encode %{ 6541 int vector_len = 2; 6542 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6548 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6549 match(Set dst (AddVL src (LoadVector mem))); 6550 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6551 ins_encode %{ 6552 int vector_len = 2; 6553 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 // Floats vector add 6559 instruct vadd2F(vecD dst, vecD src) %{ 6560 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6561 match(Set dst (AddVF dst src)); 6562 format %{ "addps $dst,$src\t! add packed2F" %} 6563 ins_encode %{ 6564 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6570 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6571 match(Set dst (AddVF src1 src2)); 6572 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6573 ins_encode %{ 6574 int vector_len = 0; 6575 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6576 %} 6577 ins_pipe( pipe_slow ); 6578 %} 6579 6580 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6581 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6582 match(Set dst (AddVF src (LoadVector mem))); 6583 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6584 ins_encode %{ 6585 int vector_len = 0; 6586 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6587 %} 6588 ins_pipe( pipe_slow ); 6589 %} 6590 6591 instruct vadd4F(vecX dst, vecX src) %{ 6592 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6593 match(Set dst (AddVF dst src)); 6594 format %{ "addps $dst,$src\t! add packed4F" %} 6595 ins_encode %{ 6596 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6602 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6603 match(Set dst (AddVF src1 src2)); 6604 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6605 ins_encode %{ 6606 int vector_len = 0; 6607 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6613 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6614 match(Set dst (AddVF src (LoadVector mem))); 6615 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6616 ins_encode %{ 6617 int vector_len = 0; 6618 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6624 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6625 match(Set dst (AddVF src1 src2)); 6626 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6627 ins_encode %{ 6628 int vector_len = 1; 6629 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6635 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6636 match(Set dst (AddVF src (LoadVector mem))); 6637 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6638 ins_encode %{ 6639 int vector_len = 1; 6640 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6646 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6647 match(Set dst (AddVF src1 src2)); 6648 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6649 ins_encode %{ 6650 int vector_len = 2; 6651 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6657 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6658 match(Set dst (AddVF src (LoadVector mem))); 6659 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6660 ins_encode %{ 6661 int vector_len = 2; 6662 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6663 %} 6664 ins_pipe( pipe_slow ); 6665 %} 6666 6667 // Doubles vector add 6668 instruct vadd2D(vecX dst, vecX src) %{ 6669 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6670 match(Set dst (AddVD dst src)); 6671 format %{ "addpd $dst,$src\t! add packed2D" %} 6672 ins_encode %{ 6673 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6674 %} 6675 ins_pipe( pipe_slow ); 6676 %} 6677 6678 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6679 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6680 match(Set dst (AddVD src1 src2)); 6681 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6682 ins_encode %{ 6683 int vector_len = 0; 6684 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6690 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6691 match(Set dst (AddVD src (LoadVector mem))); 6692 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6693 ins_encode %{ 6694 int vector_len = 0; 6695 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6696 %} 6697 ins_pipe( pipe_slow ); 6698 %} 6699 6700 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6701 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6702 match(Set dst (AddVD src1 src2)); 6703 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6704 ins_encode %{ 6705 int vector_len = 1; 6706 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6707 %} 6708 ins_pipe( pipe_slow ); 6709 %} 6710 6711 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6712 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6713 match(Set dst (AddVD src (LoadVector mem))); 6714 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6715 ins_encode %{ 6716 int vector_len = 1; 6717 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6723 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6724 match(Set dst (AddVD src1 src2)); 6725 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6726 ins_encode %{ 6727 int vector_len = 2; 6728 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6729 %} 6730 ins_pipe( pipe_slow ); 6731 %} 6732 6733 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6734 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6735 match(Set dst (AddVD src (LoadVector mem))); 6736 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6737 ins_encode %{ 6738 int vector_len = 2; 6739 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6740 %} 6741 ins_pipe( pipe_slow ); 6742 %} 6743 6744 // --------------------------------- SUB -------------------------------------- 6745 6746 // Bytes vector sub 6747 instruct vsub4B(vecS dst, vecS src) %{ 6748 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6749 match(Set dst (SubVB dst src)); 6750 format %{ "psubb $dst,$src\t! sub packed4B" %} 6751 ins_encode %{ 6752 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6753 %} 6754 ins_pipe( pipe_slow ); 6755 %} 6756 6757 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6758 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6759 match(Set dst (SubVB src1 src2)); 6760 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6761 ins_encode %{ 6762 int vector_len = 0; 6763 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6764 %} 6765 ins_pipe( pipe_slow ); 6766 %} 6767 6768 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6769 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6770 match(Set dst (SubVB src (LoadVector mem))); 6771 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6772 ins_encode %{ 6773 int vector_len = 0; 6774 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6775 %} 6776 ins_pipe( pipe_slow ); 6777 %} 6778 6779 instruct vsub8B(vecD dst, vecD src) %{ 6780 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6781 match(Set dst (SubVB dst src)); 6782 format %{ "psubb $dst,$src\t! sub packed8B" %} 6783 ins_encode %{ 6784 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6785 %} 6786 ins_pipe( pipe_slow ); 6787 %} 6788 6789 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6790 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6791 match(Set dst (SubVB src1 src2)); 6792 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6793 ins_encode %{ 6794 int vector_len = 0; 6795 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6796 %} 6797 ins_pipe( pipe_slow ); 6798 %} 6799 6800 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6801 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6802 match(Set dst (SubVB src (LoadVector mem))); 6803 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6804 ins_encode %{ 6805 int vector_len = 0; 6806 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 6811 instruct vsub16B(vecX dst, vecX src) %{ 6812 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6813 match(Set dst (SubVB dst src)); 6814 format %{ "psubb $dst,$src\t! sub packed16B" %} 6815 ins_encode %{ 6816 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6822 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6823 match(Set dst (SubVB src1 src2)); 6824 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6825 ins_encode %{ 6826 int vector_len = 0; 6827 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6828 %} 6829 ins_pipe( pipe_slow ); 6830 %} 6831 6832 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6833 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6834 match(Set dst (SubVB src (LoadVector mem))); 6835 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6836 ins_encode %{ 6837 int vector_len = 0; 6838 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6839 %} 6840 ins_pipe( pipe_slow ); 6841 %} 6842 6843 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6844 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6845 match(Set dst (SubVB src1 src2)); 6846 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6847 ins_encode %{ 6848 int vector_len = 1; 6849 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6850 %} 6851 ins_pipe( pipe_slow ); 6852 %} 6853 6854 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6855 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6856 match(Set dst (SubVB src (LoadVector mem))); 6857 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6858 ins_encode %{ 6859 int vector_len = 1; 6860 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6861 %} 6862 ins_pipe( pipe_slow ); 6863 %} 6864 6865 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6866 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6867 match(Set dst (SubVB src1 src2)); 6868 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6869 ins_encode %{ 6870 int vector_len = 2; 6871 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6872 %} 6873 ins_pipe( pipe_slow ); 6874 %} 6875 6876 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6877 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6878 match(Set dst (SubVB src (LoadVector mem))); 6879 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6880 ins_encode %{ 6881 int vector_len = 2; 6882 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6883 %} 6884 ins_pipe( pipe_slow ); 6885 %} 6886 6887 // Shorts/Chars vector sub 6888 instruct vsub2S(vecS dst, vecS src) %{ 6889 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6890 match(Set dst (SubVS dst src)); 6891 format %{ "psubw $dst,$src\t! sub packed2S" %} 6892 ins_encode %{ 6893 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6894 %} 6895 ins_pipe( pipe_slow ); 6896 %} 6897 6898 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6899 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6900 match(Set dst (SubVS src1 src2)); 6901 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6902 ins_encode %{ 6903 int vector_len = 0; 6904 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6905 %} 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6910 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6911 match(Set dst (SubVS src (LoadVector mem))); 6912 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6913 ins_encode %{ 6914 int vector_len = 0; 6915 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vsub4S(vecD dst, vecD src) %{ 6921 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6922 match(Set dst (SubVS dst src)); 6923 format %{ "psubw $dst,$src\t! sub packed4S" %} 6924 ins_encode %{ 6925 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6931 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6932 match(Set dst (SubVS src1 src2)); 6933 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6934 ins_encode %{ 6935 int vector_len = 0; 6936 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6937 %} 6938 ins_pipe( pipe_slow ); 6939 %} 6940 6941 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6942 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6943 match(Set dst (SubVS src (LoadVector mem))); 6944 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6945 ins_encode %{ 6946 int vector_len = 0; 6947 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6948 %} 6949 ins_pipe( pipe_slow ); 6950 %} 6951 6952 instruct vsub8S(vecX dst, vecX src) %{ 6953 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6954 match(Set dst (SubVS dst src)); 6955 format %{ "psubw $dst,$src\t! sub packed8S" %} 6956 ins_encode %{ 6957 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6963 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6964 match(Set dst (SubVS src1 src2)); 6965 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6966 ins_encode %{ 6967 int vector_len = 0; 6968 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6974 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6975 match(Set dst (SubVS src (LoadVector mem))); 6976 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6977 ins_encode %{ 6978 int vector_len = 0; 6979 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6985 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6986 match(Set dst (SubVS src1 src2)); 6987 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6988 ins_encode %{ 6989 int vector_len = 1; 6990 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6996 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6997 match(Set dst (SubVS src (LoadVector mem))); 6998 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6999 ins_encode %{ 7000 int vector_len = 1; 7001 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7007 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7008 match(Set dst (SubVS src1 src2)); 7009 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7010 ins_encode %{ 7011 int vector_len = 2; 7012 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7018 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7019 match(Set dst (SubVS src (LoadVector mem))); 7020 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7021 ins_encode %{ 7022 int vector_len = 2; 7023 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7024 %} 7025 ins_pipe( pipe_slow ); 7026 %} 7027 7028 // Integers vector sub 7029 instruct vsub2I(vecD dst, vecD src) %{ 7030 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7031 match(Set dst (SubVI dst src)); 7032 format %{ "psubd $dst,$src\t! sub packed2I" %} 7033 ins_encode %{ 7034 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7035 %} 7036 ins_pipe( pipe_slow ); 7037 %} 7038 7039 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7040 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7041 match(Set dst (SubVI src1 src2)); 7042 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7043 ins_encode %{ 7044 int vector_len = 0; 7045 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7046 %} 7047 ins_pipe( pipe_slow ); 7048 %} 7049 7050 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7051 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7052 match(Set dst (SubVI src (LoadVector mem))); 7053 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7054 ins_encode %{ 7055 int vector_len = 0; 7056 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 instruct vsub4I(vecX dst, vecX src) %{ 7062 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7063 match(Set dst (SubVI dst src)); 7064 format %{ "psubd $dst,$src\t! sub packed4I" %} 7065 ins_encode %{ 7066 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7067 %} 7068 ins_pipe( pipe_slow ); 7069 %} 7070 7071 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7072 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7073 match(Set dst (SubVI src1 src2)); 7074 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7075 ins_encode %{ 7076 int vector_len = 0; 7077 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7078 %} 7079 ins_pipe( pipe_slow ); 7080 %} 7081 7082 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7083 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7084 match(Set dst (SubVI src (LoadVector mem))); 7085 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7086 ins_encode %{ 7087 int vector_len = 0; 7088 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7089 %} 7090 ins_pipe( pipe_slow ); 7091 %} 7092 7093 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7094 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7095 match(Set dst (SubVI src1 src2)); 7096 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7097 ins_encode %{ 7098 int vector_len = 1; 7099 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7100 %} 7101 ins_pipe( pipe_slow ); 7102 %} 7103 7104 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7105 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7106 match(Set dst (SubVI src (LoadVector mem))); 7107 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7108 ins_encode %{ 7109 int vector_len = 1; 7110 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7111 %} 7112 ins_pipe( pipe_slow ); 7113 %} 7114 7115 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7116 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7117 match(Set dst (SubVI src1 src2)); 7118 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7119 ins_encode %{ 7120 int vector_len = 2; 7121 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7122 %} 7123 ins_pipe( pipe_slow ); 7124 %} 7125 7126 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7127 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7128 match(Set dst (SubVI src (LoadVector mem))); 7129 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7130 ins_encode %{ 7131 int vector_len = 2; 7132 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7133 %} 7134 ins_pipe( pipe_slow ); 7135 %} 7136 7137 // Longs vector sub 7138 instruct vsub2L(vecX dst, vecX src) %{ 7139 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7140 match(Set dst (SubVL dst src)); 7141 format %{ "psubq $dst,$src\t! sub packed2L" %} 7142 ins_encode %{ 7143 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7144 %} 7145 ins_pipe( pipe_slow ); 7146 %} 7147 7148 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7149 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7150 match(Set dst (SubVL src1 src2)); 7151 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7152 ins_encode %{ 7153 int vector_len = 0; 7154 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7155 %} 7156 ins_pipe( pipe_slow ); 7157 %} 7158 7159 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7160 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7161 match(Set dst (SubVL src (LoadVector mem))); 7162 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7163 ins_encode %{ 7164 int vector_len = 0; 7165 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7166 %} 7167 ins_pipe( pipe_slow ); 7168 %} 7169 7170 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7171 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7172 match(Set dst (SubVL src1 src2)); 7173 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7174 ins_encode %{ 7175 int vector_len = 1; 7176 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7177 %} 7178 ins_pipe( pipe_slow ); 7179 %} 7180 7181 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7182 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7183 match(Set dst (SubVL src (LoadVector mem))); 7184 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7185 ins_encode %{ 7186 int vector_len = 1; 7187 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7193 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7194 match(Set dst (SubVL src1 src2)); 7195 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7196 ins_encode %{ 7197 int vector_len = 2; 7198 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7199 %} 7200 ins_pipe( pipe_slow ); 7201 %} 7202 7203 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7204 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7205 match(Set dst (SubVL src (LoadVector mem))); 7206 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7207 ins_encode %{ 7208 int vector_len = 2; 7209 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7210 %} 7211 ins_pipe( pipe_slow ); 7212 %} 7213 7214 // Floats vector sub 7215 instruct vsub2F(vecD dst, vecD src) %{ 7216 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7217 match(Set dst (SubVF dst src)); 7218 format %{ "subps $dst,$src\t! sub packed2F" %} 7219 ins_encode %{ 7220 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7221 %} 7222 ins_pipe( pipe_slow ); 7223 %} 7224 7225 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7226 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7227 match(Set dst (SubVF src1 src2)); 7228 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7229 ins_encode %{ 7230 int vector_len = 0; 7231 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7232 %} 7233 ins_pipe( pipe_slow ); 7234 %} 7235 7236 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7237 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7238 match(Set dst (SubVF src (LoadVector mem))); 7239 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7240 ins_encode %{ 7241 int vector_len = 0; 7242 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7243 %} 7244 ins_pipe( pipe_slow ); 7245 %} 7246 7247 instruct vsub4F(vecX dst, vecX src) %{ 7248 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7249 match(Set dst (SubVF dst src)); 7250 format %{ "subps $dst,$src\t! sub packed4F" %} 7251 ins_encode %{ 7252 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7253 %} 7254 ins_pipe( pipe_slow ); 7255 %} 7256 7257 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7258 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7259 match(Set dst (SubVF src1 src2)); 7260 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7261 ins_encode %{ 7262 int vector_len = 0; 7263 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7269 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7270 match(Set dst (SubVF src (LoadVector mem))); 7271 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7272 ins_encode %{ 7273 int vector_len = 0; 7274 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7275 %} 7276 ins_pipe( pipe_slow ); 7277 %} 7278 7279 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7280 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7281 match(Set dst (SubVF src1 src2)); 7282 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7283 ins_encode %{ 7284 int vector_len = 1; 7285 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7286 %} 7287 ins_pipe( pipe_slow ); 7288 %} 7289 7290 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7291 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7292 match(Set dst (SubVF src (LoadVector mem))); 7293 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7294 ins_encode %{ 7295 int vector_len = 1; 7296 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7297 %} 7298 ins_pipe( pipe_slow ); 7299 %} 7300 7301 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7302 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7303 match(Set dst (SubVF src1 src2)); 7304 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7305 ins_encode %{ 7306 int vector_len = 2; 7307 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7308 %} 7309 ins_pipe( pipe_slow ); 7310 %} 7311 7312 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7313 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7314 match(Set dst (SubVF src (LoadVector mem))); 7315 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7316 ins_encode %{ 7317 int vector_len = 2; 7318 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7319 %} 7320 ins_pipe( pipe_slow ); 7321 %} 7322 7323 // Doubles vector sub 7324 instruct vsub2D(vecX dst, vecX src) %{ 7325 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7326 match(Set dst (SubVD dst src)); 7327 format %{ "subpd $dst,$src\t! sub packed2D" %} 7328 ins_encode %{ 7329 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7330 %} 7331 ins_pipe( pipe_slow ); 7332 %} 7333 7334 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7335 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7336 match(Set dst (SubVD src1 src2)); 7337 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7338 ins_encode %{ 7339 int vector_len = 0; 7340 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7341 %} 7342 ins_pipe( pipe_slow ); 7343 %} 7344 7345 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7346 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7347 match(Set dst (SubVD src (LoadVector mem))); 7348 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7349 ins_encode %{ 7350 int vector_len = 0; 7351 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7352 %} 7353 ins_pipe( pipe_slow ); 7354 %} 7355 7356 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7357 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7358 match(Set dst (SubVD src1 src2)); 7359 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7360 ins_encode %{ 7361 int vector_len = 1; 7362 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7363 %} 7364 ins_pipe( pipe_slow ); 7365 %} 7366 7367 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7368 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7369 match(Set dst (SubVD src (LoadVector mem))); 7370 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7371 ins_encode %{ 7372 int vector_len = 1; 7373 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7374 %} 7375 ins_pipe( pipe_slow ); 7376 %} 7377 7378 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7379 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7380 match(Set dst (SubVD src1 src2)); 7381 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7382 ins_encode %{ 7383 int vector_len = 2; 7384 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7385 %} 7386 ins_pipe( pipe_slow ); 7387 %} 7388 7389 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7390 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7391 match(Set dst (SubVD src (LoadVector mem))); 7392 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7393 ins_encode %{ 7394 int vector_len = 2; 7395 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7396 %} 7397 ins_pipe( pipe_slow ); 7398 %} 7399 7400 // --------------------------------- MUL -------------------------------------- 7401 7402 // Shorts/Chars vector mul 7403 instruct vmul2S(vecS dst, vecS src) %{ 7404 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7405 match(Set dst (MulVS dst src)); 7406 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7407 ins_encode %{ 7408 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7409 %} 7410 ins_pipe( pipe_slow ); 7411 %} 7412 7413 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7414 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7415 match(Set dst (MulVS src1 src2)); 7416 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7417 ins_encode %{ 7418 int vector_len = 0; 7419 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7420 %} 7421 ins_pipe( pipe_slow ); 7422 %} 7423 7424 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7425 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7426 match(Set dst (MulVS src (LoadVector mem))); 7427 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7428 ins_encode %{ 7429 int vector_len = 0; 7430 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7431 %} 7432 ins_pipe( pipe_slow ); 7433 %} 7434 7435 instruct vmul4S(vecD dst, vecD src) %{ 7436 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7437 match(Set dst (MulVS dst src)); 7438 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7439 ins_encode %{ 7440 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7441 %} 7442 ins_pipe( pipe_slow ); 7443 %} 7444 7445 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7447 match(Set dst (MulVS src1 src2)); 7448 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7449 ins_encode %{ 7450 int vector_len = 0; 7451 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7452 %} 7453 ins_pipe( pipe_slow ); 7454 %} 7455 7456 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7457 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7458 match(Set dst (MulVS src (LoadVector mem))); 7459 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7460 ins_encode %{ 7461 int vector_len = 0; 7462 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7463 %} 7464 ins_pipe( pipe_slow ); 7465 %} 7466 7467 instruct vmul8S(vecX dst, vecX src) %{ 7468 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7469 match(Set dst (MulVS dst src)); 7470 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7471 ins_encode %{ 7472 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7473 %} 7474 ins_pipe( pipe_slow ); 7475 %} 7476 7477 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7478 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7479 match(Set dst (MulVS src1 src2)); 7480 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7481 ins_encode %{ 7482 int vector_len = 0; 7483 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7484 %} 7485 ins_pipe( pipe_slow ); 7486 %} 7487 7488 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7489 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7490 match(Set dst (MulVS src (LoadVector mem))); 7491 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7492 ins_encode %{ 7493 int vector_len = 0; 7494 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7495 %} 7496 ins_pipe( pipe_slow ); 7497 %} 7498 7499 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7500 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7501 match(Set dst (MulVS src1 src2)); 7502 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7503 ins_encode %{ 7504 int vector_len = 1; 7505 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7506 %} 7507 ins_pipe( pipe_slow ); 7508 %} 7509 7510 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7511 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7512 match(Set dst (MulVS src (LoadVector mem))); 7513 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7514 ins_encode %{ 7515 int vector_len = 1; 7516 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7517 %} 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7522 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7523 match(Set dst (MulVS src1 src2)); 7524 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7525 ins_encode %{ 7526 int vector_len = 2; 7527 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7528 %} 7529 ins_pipe( pipe_slow ); 7530 %} 7531 7532 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7533 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7534 match(Set dst (MulVS src (LoadVector mem))); 7535 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7536 ins_encode %{ 7537 int vector_len = 2; 7538 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7539 %} 7540 ins_pipe( pipe_slow ); 7541 %} 7542 7543 // Integers vector mul (sse4_1) 7544 instruct vmul2I(vecD dst, vecD src) %{ 7545 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7546 match(Set dst (MulVI dst src)); 7547 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7548 ins_encode %{ 7549 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7550 %} 7551 ins_pipe( pipe_slow ); 7552 %} 7553 7554 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7555 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7556 match(Set dst (MulVI src1 src2)); 7557 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7558 ins_encode %{ 7559 int vector_len = 0; 7560 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7561 %} 7562 ins_pipe( pipe_slow ); 7563 %} 7564 7565 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7566 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7567 match(Set dst (MulVI src (LoadVector mem))); 7568 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7569 ins_encode %{ 7570 int vector_len = 0; 7571 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7572 %} 7573 ins_pipe( pipe_slow ); 7574 %} 7575 7576 instruct vmul4I(vecX dst, vecX src) %{ 7577 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7578 match(Set dst (MulVI dst src)); 7579 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7580 ins_encode %{ 7581 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7582 %} 7583 ins_pipe( pipe_slow ); 7584 %} 7585 7586 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7587 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7588 match(Set dst (MulVI src1 src2)); 7589 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7590 ins_encode %{ 7591 int vector_len = 0; 7592 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7593 %} 7594 ins_pipe( pipe_slow ); 7595 %} 7596 7597 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7598 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7599 match(Set dst (MulVI src (LoadVector mem))); 7600 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7601 ins_encode %{ 7602 int vector_len = 0; 7603 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7609 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7610 match(Set dst (MulVL src1 src2)); 7611 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7612 ins_encode %{ 7613 int vector_len = 0; 7614 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7615 %} 7616 ins_pipe( pipe_slow ); 7617 %} 7618 7619 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7620 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7621 match(Set dst (MulVL src (LoadVector mem))); 7622 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7623 ins_encode %{ 7624 int vector_len = 0; 7625 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7631 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7632 match(Set dst (MulVL src1 src2)); 7633 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7634 ins_encode %{ 7635 int vector_len = 1; 7636 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7637 %} 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7642 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7643 match(Set dst (MulVL src (LoadVector mem))); 7644 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7645 ins_encode %{ 7646 int vector_len = 1; 7647 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7648 %} 7649 ins_pipe( pipe_slow ); 7650 %} 7651 7652 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7653 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7654 match(Set dst (MulVL src1 src2)); 7655 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7656 ins_encode %{ 7657 int vector_len = 2; 7658 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7659 %} 7660 ins_pipe( pipe_slow ); 7661 %} 7662 7663 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7664 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7665 match(Set dst (MulVL src (LoadVector mem))); 7666 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7667 ins_encode %{ 7668 int vector_len = 2; 7669 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7670 %} 7671 ins_pipe( pipe_slow ); 7672 %} 7673 7674 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7675 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7676 match(Set dst (MulVI src1 src2)); 7677 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7678 ins_encode %{ 7679 int vector_len = 1; 7680 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7681 %} 7682 ins_pipe( pipe_slow ); 7683 %} 7684 7685 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7686 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7687 match(Set dst (MulVI src (LoadVector mem))); 7688 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7689 ins_encode %{ 7690 int vector_len = 1; 7691 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7692 %} 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7697 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7698 match(Set dst (MulVI src1 src2)); 7699 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7700 ins_encode %{ 7701 int vector_len = 2; 7702 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7708 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7709 match(Set dst (MulVI src (LoadVector mem))); 7710 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7711 ins_encode %{ 7712 int vector_len = 2; 7713 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 // Floats vector mul 7719 instruct vmul2F(vecD dst, vecD src) %{ 7720 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7721 match(Set dst (MulVF dst src)); 7722 format %{ "mulps $dst,$src\t! mul packed2F" %} 7723 ins_encode %{ 7724 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7730 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7731 match(Set dst (MulVF src1 src2)); 7732 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7733 ins_encode %{ 7734 int vector_len = 0; 7735 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7736 %} 7737 ins_pipe( pipe_slow ); 7738 %} 7739 7740 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7741 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7742 match(Set dst (MulVF src (LoadVector mem))); 7743 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7744 ins_encode %{ 7745 int vector_len = 0; 7746 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7747 %} 7748 ins_pipe( pipe_slow ); 7749 %} 7750 7751 instruct vmul4F(vecX dst, vecX src) %{ 7752 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7753 match(Set dst (MulVF dst src)); 7754 format %{ "mulps $dst,$src\t! mul packed4F" %} 7755 ins_encode %{ 7756 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7757 %} 7758 ins_pipe( pipe_slow ); 7759 %} 7760 7761 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7762 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7763 match(Set dst (MulVF src1 src2)); 7764 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7765 ins_encode %{ 7766 int vector_len = 0; 7767 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7773 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7774 match(Set dst (MulVF src (LoadVector mem))); 7775 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7776 ins_encode %{ 7777 int vector_len = 0; 7778 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7779 %} 7780 ins_pipe( pipe_slow ); 7781 %} 7782 7783 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7784 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7785 match(Set dst (MulVF src1 src2)); 7786 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7787 ins_encode %{ 7788 int vector_len = 1; 7789 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7795 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7796 match(Set dst (MulVF src (LoadVector mem))); 7797 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7798 ins_encode %{ 7799 int vector_len = 1; 7800 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7806 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7807 match(Set dst (MulVF src1 src2)); 7808 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7809 ins_encode %{ 7810 int vector_len = 2; 7811 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7817 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7818 match(Set dst (MulVF src (LoadVector mem))); 7819 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7820 ins_encode %{ 7821 int vector_len = 2; 7822 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7823 %} 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 // Doubles vector mul 7828 instruct vmul2D(vecX dst, vecX src) %{ 7829 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7830 match(Set dst (MulVD dst src)); 7831 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7832 ins_encode %{ 7833 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7834 %} 7835 ins_pipe( pipe_slow ); 7836 %} 7837 7838 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7839 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7840 match(Set dst (MulVD src1 src2)); 7841 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7842 ins_encode %{ 7843 int vector_len = 0; 7844 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7850 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7851 match(Set dst (MulVD src (LoadVector mem))); 7852 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7853 ins_encode %{ 7854 int vector_len = 0; 7855 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7856 %} 7857 ins_pipe( pipe_slow ); 7858 %} 7859 7860 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7861 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7862 match(Set dst (MulVD src1 src2)); 7863 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7864 ins_encode %{ 7865 int vector_len = 1; 7866 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7867 %} 7868 ins_pipe( pipe_slow ); 7869 %} 7870 7871 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7872 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7873 match(Set dst (MulVD src (LoadVector mem))); 7874 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7875 ins_encode %{ 7876 int vector_len = 1; 7877 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7878 %} 7879 ins_pipe( pipe_slow ); 7880 %} 7881 7882 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7883 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7884 match(Set dst (MulVD src1 src2)); 7885 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7886 ins_encode %{ 7887 int vector_len = 2; 7888 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7889 %} 7890 ins_pipe( pipe_slow ); 7891 %} 7892 7893 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7894 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7895 match(Set dst (MulVD src (LoadVector mem))); 7896 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7897 ins_encode %{ 7898 int vector_len = 2; 7899 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7900 %} 7901 ins_pipe( pipe_slow ); 7902 %} 7903 7904 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7905 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7906 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7907 effect(TEMP dst, USE src1, USE src2); 7908 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7909 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7910 %} 7911 ins_encode %{ 7912 int vector_len = 1; 7913 int cond = (Assembler::Condition)($copnd$$cmpcode); 7914 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7915 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7921 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7922 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7923 effect(TEMP dst, USE src1, USE src2); 7924 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7925 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7926 %} 7927 ins_encode %{ 7928 int vector_len = 1; 7929 int cond = (Assembler::Condition)($copnd$$cmpcode); 7930 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7931 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7932 %} 7933 ins_pipe( pipe_slow ); 7934 %} 7935 7936 // --------------------------------- DIV -------------------------------------- 7937 7938 // Floats vector div 7939 instruct vdiv2F(vecD dst, vecD src) %{ 7940 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7941 match(Set dst (DivVF dst src)); 7942 format %{ "divps $dst,$src\t! div packed2F" %} 7943 ins_encode %{ 7944 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7945 %} 7946 ins_pipe( pipe_slow ); 7947 %} 7948 7949 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7950 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7951 match(Set dst (DivVF src1 src2)); 7952 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7953 ins_encode %{ 7954 int vector_len = 0; 7955 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7956 %} 7957 ins_pipe( pipe_slow ); 7958 %} 7959 7960 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7961 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7962 match(Set dst (DivVF src (LoadVector mem))); 7963 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7964 ins_encode %{ 7965 int vector_len = 0; 7966 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7967 %} 7968 ins_pipe( pipe_slow ); 7969 %} 7970 7971 instruct vdiv4F(vecX dst, vecX src) %{ 7972 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7973 match(Set dst (DivVF dst src)); 7974 format %{ "divps $dst,$src\t! div packed4F" %} 7975 ins_encode %{ 7976 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7977 %} 7978 ins_pipe( pipe_slow ); 7979 %} 7980 7981 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7982 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7983 match(Set dst (DivVF src1 src2)); 7984 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7985 ins_encode %{ 7986 int vector_len = 0; 7987 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7988 %} 7989 ins_pipe( pipe_slow ); 7990 %} 7991 7992 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7993 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7994 match(Set dst (DivVF src (LoadVector mem))); 7995 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7996 ins_encode %{ 7997 int vector_len = 0; 7998 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7999 %} 8000 ins_pipe( pipe_slow ); 8001 %} 8002 8003 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8004 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8005 match(Set dst (DivVF src1 src2)); 8006 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8007 ins_encode %{ 8008 int vector_len = 1; 8009 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8010 %} 8011 ins_pipe( pipe_slow ); 8012 %} 8013 8014 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8015 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8016 match(Set dst (DivVF src (LoadVector mem))); 8017 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8018 ins_encode %{ 8019 int vector_len = 1; 8020 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8021 %} 8022 ins_pipe( pipe_slow ); 8023 %} 8024 8025 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8026 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8027 match(Set dst (DivVF src1 src2)); 8028 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8029 ins_encode %{ 8030 int vector_len = 2; 8031 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8032 %} 8033 ins_pipe( pipe_slow ); 8034 %} 8035 8036 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8037 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8038 match(Set dst (DivVF src (LoadVector mem))); 8039 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8040 ins_encode %{ 8041 int vector_len = 2; 8042 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8043 %} 8044 ins_pipe( pipe_slow ); 8045 %} 8046 8047 // Doubles vector div 8048 instruct vdiv2D(vecX dst, vecX src) %{ 8049 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8050 match(Set dst (DivVD dst src)); 8051 format %{ "divpd $dst,$src\t! div packed2D" %} 8052 ins_encode %{ 8053 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8054 %} 8055 ins_pipe( pipe_slow ); 8056 %} 8057 8058 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8059 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8060 match(Set dst (DivVD src1 src2)); 8061 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8062 ins_encode %{ 8063 int vector_len = 0; 8064 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8070 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8071 match(Set dst (DivVD src (LoadVector mem))); 8072 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8073 ins_encode %{ 8074 int vector_len = 0; 8075 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8081 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8082 match(Set dst (DivVD src1 src2)); 8083 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8084 ins_encode %{ 8085 int vector_len = 1; 8086 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8092 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8093 match(Set dst (DivVD src (LoadVector mem))); 8094 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8095 ins_encode %{ 8096 int vector_len = 1; 8097 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8098 %} 8099 ins_pipe( pipe_slow ); 8100 %} 8101 8102 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8103 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8104 match(Set dst (DivVD src1 src2)); 8105 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8106 ins_encode %{ 8107 int vector_len = 2; 8108 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8109 %} 8110 ins_pipe( pipe_slow ); 8111 %} 8112 8113 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8114 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8115 match(Set dst (DivVD src (LoadVector mem))); 8116 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8117 ins_encode %{ 8118 int vector_len = 2; 8119 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 // ------------------------------ Shift --------------------------------------- 8125 8126 // Left and right shift count vectors are the same on x86 8127 // (only lowest bits of xmm reg are used for count). 8128 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8129 match(Set dst (LShiftCntV cnt)); 8130 match(Set dst (RShiftCntV cnt)); 8131 format %{ "movd $dst,$cnt\t! load shift count" %} 8132 ins_encode %{ 8133 __ movdl($dst$$XMMRegister, $cnt$$Register); 8134 %} 8135 ins_pipe( pipe_slow ); 8136 %} 8137 8138 // --------------------------------- Sqrt -------------------------------------- 8139 8140 // Floating point vector sqrt 8141 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8142 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8143 match(Set dst (SqrtVD src)); 8144 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8145 ins_encode %{ 8146 int vector_len = 0; 8147 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8148 %} 8149 ins_pipe( pipe_slow ); 8150 %} 8151 8152 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8153 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8154 match(Set dst (SqrtVD (LoadVector mem))); 8155 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8156 ins_encode %{ 8157 int vector_len = 0; 8158 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8159 %} 8160 ins_pipe( pipe_slow ); 8161 %} 8162 8163 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8164 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8165 match(Set dst (SqrtVD src)); 8166 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8167 ins_encode %{ 8168 int vector_len = 1; 8169 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8170 %} 8171 ins_pipe( pipe_slow ); 8172 %} 8173 8174 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8175 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8176 match(Set dst (SqrtVD (LoadVector mem))); 8177 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8178 ins_encode %{ 8179 int vector_len = 1; 8180 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8181 %} 8182 ins_pipe( pipe_slow ); 8183 %} 8184 8185 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8186 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8187 match(Set dst (SqrtVD src)); 8188 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8189 ins_encode %{ 8190 int vector_len = 2; 8191 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8192 %} 8193 ins_pipe( pipe_slow ); 8194 %} 8195 8196 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8197 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8198 match(Set dst (SqrtVD (LoadVector mem))); 8199 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8200 ins_encode %{ 8201 int vector_len = 2; 8202 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8203 %} 8204 ins_pipe( pipe_slow ); 8205 %} 8206 8207 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8208 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8209 match(Set dst (SqrtVF src)); 8210 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8211 ins_encode %{ 8212 int vector_len = 0; 8213 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8214 %} 8215 ins_pipe( pipe_slow ); 8216 %} 8217 8218 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8219 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8220 match(Set dst (SqrtVF (LoadVector mem))); 8221 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8222 ins_encode %{ 8223 int vector_len = 0; 8224 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8225 %} 8226 ins_pipe( pipe_slow ); 8227 %} 8228 8229 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8230 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8231 match(Set dst (SqrtVF src)); 8232 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8233 ins_encode %{ 8234 int vector_len = 0; 8235 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8236 %} 8237 ins_pipe( pipe_slow ); 8238 %} 8239 8240 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8241 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8242 match(Set dst (SqrtVF (LoadVector mem))); 8243 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8244 ins_encode %{ 8245 int vector_len = 0; 8246 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8247 %} 8248 ins_pipe( pipe_slow ); 8249 %} 8250 8251 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8252 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8253 match(Set dst (SqrtVF src)); 8254 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8255 ins_encode %{ 8256 int vector_len = 1; 8257 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8258 %} 8259 ins_pipe( pipe_slow ); 8260 %} 8261 8262 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8263 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8264 match(Set dst (SqrtVF (LoadVector mem))); 8265 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8266 ins_encode %{ 8267 int vector_len = 1; 8268 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8269 %} 8270 ins_pipe( pipe_slow ); 8271 %} 8272 8273 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8274 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8275 match(Set dst (SqrtVF src)); 8276 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8277 ins_encode %{ 8278 int vector_len = 2; 8279 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8280 %} 8281 ins_pipe( pipe_slow ); 8282 %} 8283 8284 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8285 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8286 match(Set dst (SqrtVF (LoadVector mem))); 8287 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8288 ins_encode %{ 8289 int vector_len = 2; 8290 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8291 %} 8292 ins_pipe( pipe_slow ); 8293 %} 8294 8295 // ------------------------------ LeftShift ----------------------------------- 8296 8297 // Shorts/Chars vector left shift 8298 instruct vsll2S(vecS dst, vecS shift) %{ 8299 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8300 match(Set dst (LShiftVS dst shift)); 8301 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8302 ins_encode %{ 8303 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8304 %} 8305 ins_pipe( pipe_slow ); 8306 %} 8307 8308 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8309 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8310 match(Set dst (LShiftVS dst shift)); 8311 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8312 ins_encode %{ 8313 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8314 %} 8315 ins_pipe( pipe_slow ); 8316 %} 8317 8318 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8319 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8320 match(Set dst (LShiftVS src shift)); 8321 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8322 ins_encode %{ 8323 int vector_len = 0; 8324 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8330 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8331 match(Set dst (LShiftVS src shift)); 8332 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8333 ins_encode %{ 8334 int vector_len = 0; 8335 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8336 %} 8337 ins_pipe( pipe_slow ); 8338 %} 8339 8340 instruct vsll4S(vecD dst, vecS shift) %{ 8341 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8342 match(Set dst (LShiftVS dst shift)); 8343 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8344 ins_encode %{ 8345 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8346 %} 8347 ins_pipe( pipe_slow ); 8348 %} 8349 8350 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8351 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8352 match(Set dst (LShiftVS dst shift)); 8353 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8354 ins_encode %{ 8355 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8356 %} 8357 ins_pipe( pipe_slow ); 8358 %} 8359 8360 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8361 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8362 match(Set dst (LShiftVS src shift)); 8363 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8364 ins_encode %{ 8365 int vector_len = 0; 8366 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8367 %} 8368 ins_pipe( pipe_slow ); 8369 %} 8370 8371 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8372 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8373 match(Set dst (LShiftVS src shift)); 8374 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8375 ins_encode %{ 8376 int vector_len = 0; 8377 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8378 %} 8379 ins_pipe( pipe_slow ); 8380 %} 8381 8382 instruct vsll8S(vecX dst, vecS shift) %{ 8383 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8384 match(Set dst (LShiftVS dst shift)); 8385 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8386 ins_encode %{ 8387 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8393 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8394 match(Set dst (LShiftVS dst shift)); 8395 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8396 ins_encode %{ 8397 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8403 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8404 match(Set dst (LShiftVS src shift)); 8405 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8406 ins_encode %{ 8407 int vector_len = 0; 8408 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8409 %} 8410 ins_pipe( pipe_slow ); 8411 %} 8412 8413 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8414 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8415 match(Set dst (LShiftVS src shift)); 8416 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8417 ins_encode %{ 8418 int vector_len = 0; 8419 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 8424 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8425 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8426 match(Set dst (LShiftVS src shift)); 8427 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8428 ins_encode %{ 8429 int vector_len = 1; 8430 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8431 %} 8432 ins_pipe( pipe_slow ); 8433 %} 8434 8435 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8436 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8437 match(Set dst (LShiftVS src shift)); 8438 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8439 ins_encode %{ 8440 int vector_len = 1; 8441 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8442 %} 8443 ins_pipe( pipe_slow ); 8444 %} 8445 8446 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8447 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8448 match(Set dst (LShiftVS src shift)); 8449 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8450 ins_encode %{ 8451 int vector_len = 2; 8452 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 8457 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8458 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8459 match(Set dst (LShiftVS src shift)); 8460 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8461 ins_encode %{ 8462 int vector_len = 2; 8463 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8464 %} 8465 ins_pipe( pipe_slow ); 8466 %} 8467 8468 // Integers vector left shift 8469 instruct vsll2I(vecD dst, vecS shift) %{ 8470 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8471 match(Set dst (LShiftVI dst shift)); 8472 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8473 ins_encode %{ 8474 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8475 %} 8476 ins_pipe( pipe_slow ); 8477 %} 8478 8479 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8480 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8481 match(Set dst (LShiftVI dst shift)); 8482 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8483 ins_encode %{ 8484 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8490 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8491 match(Set dst (LShiftVI src shift)); 8492 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8493 ins_encode %{ 8494 int vector_len = 0; 8495 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8501 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8502 match(Set dst (LShiftVI src shift)); 8503 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8504 ins_encode %{ 8505 int vector_len = 0; 8506 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vsll4I(vecX dst, vecS shift) %{ 8512 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8513 match(Set dst (LShiftVI dst shift)); 8514 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8515 ins_encode %{ 8516 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8522 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8523 match(Set dst (LShiftVI dst shift)); 8524 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8525 ins_encode %{ 8526 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8527 %} 8528 ins_pipe( pipe_slow ); 8529 %} 8530 8531 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8532 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8533 match(Set dst (LShiftVI src shift)); 8534 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8535 ins_encode %{ 8536 int vector_len = 0; 8537 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8538 %} 8539 ins_pipe( pipe_slow ); 8540 %} 8541 8542 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8543 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8544 match(Set dst (LShiftVI src shift)); 8545 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8546 ins_encode %{ 8547 int vector_len = 0; 8548 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8549 %} 8550 ins_pipe( pipe_slow ); 8551 %} 8552 8553 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8554 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8555 match(Set dst (LShiftVI src shift)); 8556 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8557 ins_encode %{ 8558 int vector_len = 1; 8559 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8560 %} 8561 ins_pipe( pipe_slow ); 8562 %} 8563 8564 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8565 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8566 match(Set dst (LShiftVI src shift)); 8567 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8568 ins_encode %{ 8569 int vector_len = 1; 8570 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8571 %} 8572 ins_pipe( pipe_slow ); 8573 %} 8574 8575 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8576 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8577 match(Set dst (LShiftVI src shift)); 8578 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8579 ins_encode %{ 8580 int vector_len = 2; 8581 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8582 %} 8583 ins_pipe( pipe_slow ); 8584 %} 8585 8586 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8587 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8588 match(Set dst (LShiftVI src shift)); 8589 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8590 ins_encode %{ 8591 int vector_len = 2; 8592 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8593 %} 8594 ins_pipe( pipe_slow ); 8595 %} 8596 8597 // Longs vector left shift 8598 instruct vsll2L(vecX dst, vecS shift) %{ 8599 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8600 match(Set dst (LShiftVL dst shift)); 8601 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8602 ins_encode %{ 8603 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8604 %} 8605 ins_pipe( pipe_slow ); 8606 %} 8607 8608 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8609 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8610 match(Set dst (LShiftVL dst shift)); 8611 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8612 ins_encode %{ 8613 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8614 %} 8615 ins_pipe( pipe_slow ); 8616 %} 8617 8618 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8619 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8620 match(Set dst (LShiftVL src shift)); 8621 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8622 ins_encode %{ 8623 int vector_len = 0; 8624 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8625 %} 8626 ins_pipe( pipe_slow ); 8627 %} 8628 8629 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8630 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8631 match(Set dst (LShiftVL src shift)); 8632 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8633 ins_encode %{ 8634 int vector_len = 0; 8635 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8636 %} 8637 ins_pipe( pipe_slow ); 8638 %} 8639 8640 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8641 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8642 match(Set dst (LShiftVL src shift)); 8643 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8644 ins_encode %{ 8645 int vector_len = 1; 8646 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8647 %} 8648 ins_pipe( pipe_slow ); 8649 %} 8650 8651 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8652 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8653 match(Set dst (LShiftVL src shift)); 8654 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8655 ins_encode %{ 8656 int vector_len = 1; 8657 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8658 %} 8659 ins_pipe( pipe_slow ); 8660 %} 8661 8662 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8663 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8664 match(Set dst (LShiftVL src shift)); 8665 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8666 ins_encode %{ 8667 int vector_len = 2; 8668 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8669 %} 8670 ins_pipe( pipe_slow ); 8671 %} 8672 8673 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8674 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8675 match(Set dst (LShiftVL src shift)); 8676 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8677 ins_encode %{ 8678 int vector_len = 2; 8679 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8680 %} 8681 ins_pipe( pipe_slow ); 8682 %} 8683 8684 // ----------------------- LogicalRightShift ----------------------------------- 8685 8686 // Shorts vector logical right shift produces incorrect Java result 8687 // for negative data because java code convert short value into int with 8688 // sign extension before a shift. But char vectors are fine since chars are 8689 // unsigned values. 8690 8691 instruct vsrl2S(vecS dst, vecS shift) %{ 8692 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8693 match(Set dst (URShiftVS dst shift)); 8694 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8695 ins_encode %{ 8696 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8702 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8703 match(Set dst (URShiftVS dst shift)); 8704 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8705 ins_encode %{ 8706 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8707 %} 8708 ins_pipe( pipe_slow ); 8709 %} 8710 8711 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8712 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8713 match(Set dst (URShiftVS src shift)); 8714 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8715 ins_encode %{ 8716 int vector_len = 0; 8717 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8718 %} 8719 ins_pipe( pipe_slow ); 8720 %} 8721 8722 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8723 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8724 match(Set dst (URShiftVS src shift)); 8725 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8726 ins_encode %{ 8727 int vector_len = 0; 8728 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8729 %} 8730 ins_pipe( pipe_slow ); 8731 %} 8732 8733 instruct vsrl4S(vecD dst, vecS shift) %{ 8734 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8735 match(Set dst (URShiftVS dst shift)); 8736 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8737 ins_encode %{ 8738 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8739 %} 8740 ins_pipe( pipe_slow ); 8741 %} 8742 8743 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8744 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8745 match(Set dst (URShiftVS dst shift)); 8746 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8747 ins_encode %{ 8748 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8749 %} 8750 ins_pipe( pipe_slow ); 8751 %} 8752 8753 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8755 match(Set dst (URShiftVS src shift)); 8756 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8757 ins_encode %{ 8758 int vector_len = 0; 8759 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8760 %} 8761 ins_pipe( pipe_slow ); 8762 %} 8763 8764 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8765 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8766 match(Set dst (URShiftVS src shift)); 8767 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8768 ins_encode %{ 8769 int vector_len = 0; 8770 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8771 %} 8772 ins_pipe( pipe_slow ); 8773 %} 8774 8775 instruct vsrl8S(vecX dst, vecS shift) %{ 8776 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8777 match(Set dst (URShiftVS dst shift)); 8778 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8779 ins_encode %{ 8780 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8781 %} 8782 ins_pipe( pipe_slow ); 8783 %} 8784 8785 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8786 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8787 match(Set dst (URShiftVS dst shift)); 8788 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8789 ins_encode %{ 8790 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8791 %} 8792 ins_pipe( pipe_slow ); 8793 %} 8794 8795 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8796 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8797 match(Set dst (URShiftVS src shift)); 8798 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8799 ins_encode %{ 8800 int vector_len = 0; 8801 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8802 %} 8803 ins_pipe( pipe_slow ); 8804 %} 8805 8806 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8807 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8808 match(Set dst (URShiftVS src shift)); 8809 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8810 ins_encode %{ 8811 int vector_len = 0; 8812 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8813 %} 8814 ins_pipe( pipe_slow ); 8815 %} 8816 8817 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8818 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8819 match(Set dst (URShiftVS src shift)); 8820 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8821 ins_encode %{ 8822 int vector_len = 1; 8823 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8824 %} 8825 ins_pipe( pipe_slow ); 8826 %} 8827 8828 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8829 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8830 match(Set dst (URShiftVS src shift)); 8831 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8832 ins_encode %{ 8833 int vector_len = 1; 8834 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8840 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8841 match(Set dst (URShiftVS src shift)); 8842 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8843 ins_encode %{ 8844 int vector_len = 2; 8845 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8846 %} 8847 ins_pipe( pipe_slow ); 8848 %} 8849 8850 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8851 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8852 match(Set dst (URShiftVS src shift)); 8853 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8854 ins_encode %{ 8855 int vector_len = 2; 8856 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 // Integers vector logical right shift 8862 instruct vsrl2I(vecD dst, vecS shift) %{ 8863 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8864 match(Set dst (URShiftVI dst shift)); 8865 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8866 ins_encode %{ 8867 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8873 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8874 match(Set dst (URShiftVI dst shift)); 8875 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8876 ins_encode %{ 8877 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8878 %} 8879 ins_pipe( pipe_slow ); 8880 %} 8881 8882 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8883 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8884 match(Set dst (URShiftVI src shift)); 8885 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8886 ins_encode %{ 8887 int vector_len = 0; 8888 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8889 %} 8890 ins_pipe( pipe_slow ); 8891 %} 8892 8893 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8894 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8895 match(Set dst (URShiftVI src shift)); 8896 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8897 ins_encode %{ 8898 int vector_len = 0; 8899 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vsrl4I(vecX dst, vecS shift) %{ 8905 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8906 match(Set dst (URShiftVI dst shift)); 8907 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8908 ins_encode %{ 8909 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8915 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8916 match(Set dst (URShiftVI dst shift)); 8917 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8918 ins_encode %{ 8919 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8920 %} 8921 ins_pipe( pipe_slow ); 8922 %} 8923 8924 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8925 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8926 match(Set dst (URShiftVI src shift)); 8927 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8928 ins_encode %{ 8929 int vector_len = 0; 8930 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8931 %} 8932 ins_pipe( pipe_slow ); 8933 %} 8934 8935 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8936 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8937 match(Set dst (URShiftVI src shift)); 8938 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8939 ins_encode %{ 8940 int vector_len = 0; 8941 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8942 %} 8943 ins_pipe( pipe_slow ); 8944 %} 8945 8946 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8947 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8948 match(Set dst (URShiftVI src shift)); 8949 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8950 ins_encode %{ 8951 int vector_len = 1; 8952 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8953 %} 8954 ins_pipe( pipe_slow ); 8955 %} 8956 8957 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8958 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8959 match(Set dst (URShiftVI src shift)); 8960 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8961 ins_encode %{ 8962 int vector_len = 1; 8963 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8964 %} 8965 ins_pipe( pipe_slow ); 8966 %} 8967 8968 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8969 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8970 match(Set dst (URShiftVI src shift)); 8971 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8972 ins_encode %{ 8973 int vector_len = 2; 8974 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8975 %} 8976 ins_pipe( pipe_slow ); 8977 %} 8978 8979 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8980 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8981 match(Set dst (URShiftVI src shift)); 8982 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8983 ins_encode %{ 8984 int vector_len = 2; 8985 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8986 %} 8987 ins_pipe( pipe_slow ); 8988 %} 8989 8990 // Longs vector logical right shift 8991 instruct vsrl2L(vecX dst, vecS shift) %{ 8992 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8993 match(Set dst (URShiftVL dst shift)); 8994 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8995 ins_encode %{ 8996 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8997 %} 8998 ins_pipe( pipe_slow ); 8999 %} 9000 9001 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9002 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9003 match(Set dst (URShiftVL dst shift)); 9004 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9005 ins_encode %{ 9006 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9007 %} 9008 ins_pipe( pipe_slow ); 9009 %} 9010 9011 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9012 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9013 match(Set dst (URShiftVL src shift)); 9014 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9015 ins_encode %{ 9016 int vector_len = 0; 9017 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9018 %} 9019 ins_pipe( pipe_slow ); 9020 %} 9021 9022 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9023 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9024 match(Set dst (URShiftVL src shift)); 9025 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9026 ins_encode %{ 9027 int vector_len = 0; 9028 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9029 %} 9030 ins_pipe( pipe_slow ); 9031 %} 9032 9033 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9034 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9035 match(Set dst (URShiftVL src shift)); 9036 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9037 ins_encode %{ 9038 int vector_len = 1; 9039 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9045 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9046 match(Set dst (URShiftVL src shift)); 9047 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9048 ins_encode %{ 9049 int vector_len = 1; 9050 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9051 %} 9052 ins_pipe( pipe_slow ); 9053 %} 9054 9055 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9056 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9057 match(Set dst (URShiftVL src shift)); 9058 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9059 ins_encode %{ 9060 int vector_len = 2; 9061 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9062 %} 9063 ins_pipe( pipe_slow ); 9064 %} 9065 9066 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9067 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9068 match(Set dst (URShiftVL src shift)); 9069 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9070 ins_encode %{ 9071 int vector_len = 2; 9072 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 // ------------------- ArithmeticRightShift ----------------------------------- 9078 9079 // Shorts/Chars vector arithmetic right shift 9080 instruct vsra2S(vecS dst, vecS shift) %{ 9081 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9082 match(Set dst (RShiftVS dst shift)); 9083 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9084 ins_encode %{ 9085 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9086 %} 9087 ins_pipe( pipe_slow ); 9088 %} 9089 9090 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9091 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9092 match(Set dst (RShiftVS dst shift)); 9093 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9094 ins_encode %{ 9095 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9096 %} 9097 ins_pipe( pipe_slow ); 9098 %} 9099 9100 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 9101 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9102 match(Set dst (RShiftVS src shift)); 9103 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9104 ins_encode %{ 9105 int vector_len = 0; 9106 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9107 %} 9108 ins_pipe( pipe_slow ); 9109 %} 9110 9111 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9112 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9113 match(Set dst (RShiftVS src shift)); 9114 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9115 ins_encode %{ 9116 int vector_len = 0; 9117 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9118 %} 9119 ins_pipe( pipe_slow ); 9120 %} 9121 9122 instruct vsra4S(vecD dst, vecS shift) %{ 9123 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9124 match(Set dst (RShiftVS dst shift)); 9125 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9126 ins_encode %{ 9127 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9128 %} 9129 ins_pipe( pipe_slow ); 9130 %} 9131 9132 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9133 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9134 match(Set dst (RShiftVS dst shift)); 9135 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9136 ins_encode %{ 9137 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9138 %} 9139 ins_pipe( pipe_slow ); 9140 %} 9141 9142 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9143 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9144 match(Set dst (RShiftVS src shift)); 9145 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9146 ins_encode %{ 9147 int vector_len = 0; 9148 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9149 %} 9150 ins_pipe( pipe_slow ); 9151 %} 9152 9153 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9155 match(Set dst (RShiftVS src shift)); 9156 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9157 ins_encode %{ 9158 int vector_len = 0; 9159 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9160 %} 9161 ins_pipe( pipe_slow ); 9162 %} 9163 9164 instruct vsra8S(vecX dst, vecS shift) %{ 9165 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9166 match(Set dst (RShiftVS dst shift)); 9167 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9168 ins_encode %{ 9169 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9170 %} 9171 ins_pipe( pipe_slow ); 9172 %} 9173 9174 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9175 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9176 match(Set dst (RShiftVS dst shift)); 9177 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9178 ins_encode %{ 9179 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9180 %} 9181 ins_pipe( pipe_slow ); 9182 %} 9183 9184 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9185 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9186 match(Set dst (RShiftVS src shift)); 9187 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9188 ins_encode %{ 9189 int vector_len = 0; 9190 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9191 %} 9192 ins_pipe( pipe_slow ); 9193 %} 9194 9195 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9196 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9197 match(Set dst (RShiftVS src shift)); 9198 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9199 ins_encode %{ 9200 int vector_len = 0; 9201 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9207 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9208 match(Set dst (RShiftVS src shift)); 9209 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9210 ins_encode %{ 9211 int vector_len = 1; 9212 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9213 %} 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9218 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9219 match(Set dst (RShiftVS src shift)); 9220 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9221 ins_encode %{ 9222 int vector_len = 1; 9223 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9224 %} 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9229 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9230 match(Set dst (RShiftVS src shift)); 9231 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9232 ins_encode %{ 9233 int vector_len = 2; 9234 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9235 %} 9236 ins_pipe( pipe_slow ); 9237 %} 9238 9239 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9240 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9241 match(Set dst (RShiftVS src shift)); 9242 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9243 ins_encode %{ 9244 int vector_len = 2; 9245 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 // Integers vector arithmetic right shift 9251 instruct vsra2I(vecD dst, vecS shift) %{ 9252 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9253 match(Set dst (RShiftVI dst shift)); 9254 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9255 ins_encode %{ 9256 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9257 %} 9258 ins_pipe( pipe_slow ); 9259 %} 9260 9261 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9262 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9263 match(Set dst (RShiftVI dst shift)); 9264 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9265 ins_encode %{ 9266 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9272 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9273 match(Set dst (RShiftVI src shift)); 9274 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9275 ins_encode %{ 9276 int vector_len = 0; 9277 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9283 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9284 match(Set dst (RShiftVI src shift)); 9285 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9286 ins_encode %{ 9287 int vector_len = 0; 9288 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9289 %} 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 instruct vsra4I(vecX dst, vecS shift) %{ 9294 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9295 match(Set dst (RShiftVI dst shift)); 9296 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9297 ins_encode %{ 9298 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9299 %} 9300 ins_pipe( pipe_slow ); 9301 %} 9302 9303 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9304 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9305 match(Set dst (RShiftVI dst shift)); 9306 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9307 ins_encode %{ 9308 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9309 %} 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9314 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9315 match(Set dst (RShiftVI src shift)); 9316 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9317 ins_encode %{ 9318 int vector_len = 0; 9319 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9325 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9326 match(Set dst (RShiftVI src shift)); 9327 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9328 ins_encode %{ 9329 int vector_len = 0; 9330 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9331 %} 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9336 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9337 match(Set dst (RShiftVI src shift)); 9338 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9339 ins_encode %{ 9340 int vector_len = 1; 9341 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9347 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9348 match(Set dst (RShiftVI src shift)); 9349 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9350 ins_encode %{ 9351 int vector_len = 1; 9352 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9353 %} 9354 ins_pipe( pipe_slow ); 9355 %} 9356 9357 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9358 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9359 match(Set dst (RShiftVI src shift)); 9360 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9361 ins_encode %{ 9362 int vector_len = 2; 9363 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9364 %} 9365 ins_pipe( pipe_slow ); 9366 %} 9367 9368 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9369 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9370 match(Set dst (RShiftVI src shift)); 9371 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9372 ins_encode %{ 9373 int vector_len = 2; 9374 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 // There are no longs vector arithmetic right shift instructions. 9380 9381 9382 // --------------------------------- AND -------------------------------------- 9383 9384 instruct vand4B(vecS dst, vecS src) %{ 9385 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9386 match(Set dst (AndV dst src)); 9387 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9388 ins_encode %{ 9389 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9390 %} 9391 ins_pipe( pipe_slow ); 9392 %} 9393 9394 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9395 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9396 match(Set dst (AndV src1 src2)); 9397 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9398 ins_encode %{ 9399 int vector_len = 0; 9400 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9401 %} 9402 ins_pipe( pipe_slow ); 9403 %} 9404 9405 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9406 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9407 match(Set dst (AndV src (LoadVector mem))); 9408 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9409 ins_encode %{ 9410 int vector_len = 0; 9411 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9412 %} 9413 ins_pipe( pipe_slow ); 9414 %} 9415 9416 instruct vand8B(vecD dst, vecD src) %{ 9417 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9418 match(Set dst (AndV dst src)); 9419 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9420 ins_encode %{ 9421 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 9427 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9428 match(Set dst (AndV src1 src2)); 9429 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 9430 ins_encode %{ 9431 int vector_len = 0; 9432 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 9438 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9439 match(Set dst (AndV src (LoadVector mem))); 9440 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 9441 ins_encode %{ 9442 int vector_len = 0; 9443 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9444 %} 9445 ins_pipe( pipe_slow ); 9446 %} 9447 9448 instruct vand16B(vecX dst, vecX src) %{ 9449 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9450 match(Set dst (AndV dst src)); 9451 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 9452 ins_encode %{ 9453 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9454 %} 9455 ins_pipe( pipe_slow ); 9456 %} 9457 9458 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 9459 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9460 match(Set dst (AndV src1 src2)); 9461 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 9462 ins_encode %{ 9463 int vector_len = 0; 9464 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9465 %} 9466 ins_pipe( pipe_slow ); 9467 %} 9468 9469 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 9470 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9471 match(Set dst (AndV src (LoadVector mem))); 9472 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 9473 ins_encode %{ 9474 int vector_len = 0; 9475 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9476 %} 9477 ins_pipe( pipe_slow ); 9478 %} 9479 9480 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 9481 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9482 match(Set dst (AndV src1 src2)); 9483 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 9484 ins_encode %{ 9485 int vector_len = 1; 9486 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9487 %} 9488 ins_pipe( pipe_slow ); 9489 %} 9490 9491 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 9492 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9493 match(Set dst (AndV src (LoadVector mem))); 9494 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 9495 ins_encode %{ 9496 int vector_len = 1; 9497 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9498 %} 9499 ins_pipe( pipe_slow ); 9500 %} 9501 9502 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9503 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9504 match(Set dst (AndV src1 src2)); 9505 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 9506 ins_encode %{ 9507 int vector_len = 2; 9508 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9514 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9515 match(Set dst (AndV src (LoadVector mem))); 9516 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9517 ins_encode %{ 9518 int vector_len = 2; 9519 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9520 %} 9521 ins_pipe( pipe_slow ); 9522 %} 9523 9524 // --------------------------------- OR --------------------------------------- 9525 9526 instruct vor4B(vecS dst, vecS src) %{ 9527 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9528 match(Set dst (OrV dst src)); 9529 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9530 ins_encode %{ 9531 __ por($dst$$XMMRegister, $src$$XMMRegister); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9537 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9538 match(Set dst (OrV src1 src2)); 9539 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9540 ins_encode %{ 9541 int vector_len = 0; 9542 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9543 %} 9544 ins_pipe( pipe_slow ); 9545 %} 9546 9547 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9548 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9549 match(Set dst (OrV src (LoadVector mem))); 9550 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9551 ins_encode %{ 9552 int vector_len = 0; 9553 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9554 %} 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 instruct vor8B(vecD dst, vecD src) %{ 9559 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9560 match(Set dst (OrV dst src)); 9561 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9562 ins_encode %{ 9563 __ por($dst$$XMMRegister, $src$$XMMRegister); 9564 %} 9565 ins_pipe( pipe_slow ); 9566 %} 9567 9568 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9569 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9570 match(Set dst (OrV src1 src2)); 9571 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9572 ins_encode %{ 9573 int vector_len = 0; 9574 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9575 %} 9576 ins_pipe( pipe_slow ); 9577 %} 9578 9579 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9580 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9581 match(Set dst (OrV src (LoadVector mem))); 9582 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9583 ins_encode %{ 9584 int vector_len = 0; 9585 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9586 %} 9587 ins_pipe( pipe_slow ); 9588 %} 9589 9590 instruct vor16B(vecX dst, vecX src) %{ 9591 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9592 match(Set dst (OrV dst src)); 9593 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9594 ins_encode %{ 9595 __ por($dst$$XMMRegister, $src$$XMMRegister); 9596 %} 9597 ins_pipe( pipe_slow ); 9598 %} 9599 9600 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9601 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9602 match(Set dst (OrV src1 src2)); 9603 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9604 ins_encode %{ 9605 int vector_len = 0; 9606 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9607 %} 9608 ins_pipe( pipe_slow ); 9609 %} 9610 9611 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9612 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9613 match(Set dst (OrV src (LoadVector mem))); 9614 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9615 ins_encode %{ 9616 int vector_len = 0; 9617 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9618 %} 9619 ins_pipe( pipe_slow ); 9620 %} 9621 9622 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9623 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9624 match(Set dst (OrV src1 src2)); 9625 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9626 ins_encode %{ 9627 int vector_len = 1; 9628 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9629 %} 9630 ins_pipe( pipe_slow ); 9631 %} 9632 9633 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9634 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9635 match(Set dst (OrV src (LoadVector mem))); 9636 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9637 ins_encode %{ 9638 int vector_len = 1; 9639 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9645 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9646 match(Set dst (OrV src1 src2)); 9647 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9648 ins_encode %{ 9649 int vector_len = 2; 9650 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9651 %} 9652 ins_pipe( pipe_slow ); 9653 %} 9654 9655 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9656 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9657 match(Set dst (OrV src (LoadVector mem))); 9658 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9659 ins_encode %{ 9660 int vector_len = 2; 9661 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9662 %} 9663 ins_pipe( pipe_slow ); 9664 %} 9665 9666 // --------------------------------- XOR -------------------------------------- 9667 9668 instruct vxor4B(vecS dst, vecS src) %{ 9669 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9670 match(Set dst (XorV dst src)); 9671 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9672 ins_encode %{ 9673 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9679 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9680 match(Set dst (XorV src1 src2)); 9681 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9682 ins_encode %{ 9683 int vector_len = 0; 9684 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9685 %} 9686 ins_pipe( pipe_slow ); 9687 %} 9688 9689 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9690 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9691 match(Set dst (XorV src (LoadVector mem))); 9692 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9693 ins_encode %{ 9694 int vector_len = 0; 9695 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9696 %} 9697 ins_pipe( pipe_slow ); 9698 %} 9699 9700 instruct vxor8B(vecD dst, vecD src) %{ 9701 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9702 match(Set dst (XorV dst src)); 9703 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9704 ins_encode %{ 9705 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9706 %} 9707 ins_pipe( pipe_slow ); 9708 %} 9709 9710 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9711 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9712 match(Set dst (XorV src1 src2)); 9713 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9714 ins_encode %{ 9715 int vector_len = 0; 9716 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9722 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9723 match(Set dst (XorV src (LoadVector mem))); 9724 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9725 ins_encode %{ 9726 int vector_len = 0; 9727 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9728 %} 9729 ins_pipe( pipe_slow ); 9730 %} 9731 9732 instruct vxor16B(vecX dst, vecX src) %{ 9733 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9734 match(Set dst (XorV dst src)); 9735 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9736 ins_encode %{ 9737 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9738 %} 9739 ins_pipe( pipe_slow ); 9740 %} 9741 9742 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9743 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9744 match(Set dst (XorV src1 src2)); 9745 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9746 ins_encode %{ 9747 int vector_len = 0; 9748 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9754 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9755 match(Set dst (XorV src (LoadVector mem))); 9756 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9757 ins_encode %{ 9758 int vector_len = 0; 9759 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9760 %} 9761 ins_pipe( pipe_slow ); 9762 %} 9763 9764 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9765 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9766 match(Set dst (XorV src1 src2)); 9767 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9768 ins_encode %{ 9769 int vector_len = 1; 9770 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 %} 9774 9775 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9776 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9777 match(Set dst (XorV src (LoadVector mem))); 9778 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9779 ins_encode %{ 9780 int vector_len = 1; 9781 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9787 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9788 match(Set dst (XorV src1 src2)); 9789 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9790 ins_encode %{ 9791 int vector_len = 2; 9792 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9798 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9799 match(Set dst (XorV src (LoadVector mem))); 9800 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9801 ins_encode %{ 9802 int vector_len = 2; 9803 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9804 %} 9805 ins_pipe( pipe_slow ); 9806 %} 9807 9808 // --------------------------------- FMA -------------------------------------- 9809 9810 // a * b + c 9811 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9812 predicate(UseFMA && n->as_Vector()->length() == 2); 9813 match(Set c (FmaVD c (Binary a b))); 9814 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9815 ins_cost(150); 9816 ins_encode %{ 9817 int vector_len = 0; 9818 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9819 %} 9820 ins_pipe( pipe_slow ); 9821 %} 9822 9823 // a * b + c 9824 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9825 predicate(UseFMA && n->as_Vector()->length() == 2); 9826 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9827 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9828 ins_cost(150); 9829 ins_encode %{ 9830 int vector_len = 0; 9831 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9832 %} 9833 ins_pipe( pipe_slow ); 9834 %} 9835 9836 9837 // a * b + c 9838 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9839 predicate(UseFMA && n->as_Vector()->length() == 4); 9840 match(Set c (FmaVD c (Binary a b))); 9841 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9842 ins_cost(150); 9843 ins_encode %{ 9844 int vector_len = 1; 9845 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9846 %} 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 // a * b + c 9851 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9852 predicate(UseFMA && n->as_Vector()->length() == 4); 9853 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9854 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9855 ins_cost(150); 9856 ins_encode %{ 9857 int vector_len = 1; 9858 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 // a * b + c 9864 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9865 predicate(UseFMA && n->as_Vector()->length() == 8); 9866 match(Set c (FmaVD c (Binary a b))); 9867 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9868 ins_cost(150); 9869 ins_encode %{ 9870 int vector_len = 2; 9871 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9872 %} 9873 ins_pipe( pipe_slow ); 9874 %} 9875 9876 // a * b + c 9877 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9878 predicate(UseFMA && n->as_Vector()->length() == 8); 9879 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9880 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9881 ins_cost(150); 9882 ins_encode %{ 9883 int vector_len = 2; 9884 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 // a * b + c 9890 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9891 predicate(UseFMA && n->as_Vector()->length() == 4); 9892 match(Set c (FmaVF c (Binary a b))); 9893 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9894 ins_cost(150); 9895 ins_encode %{ 9896 int vector_len = 0; 9897 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9898 %} 9899 ins_pipe( pipe_slow ); 9900 %} 9901 9902 // a * b + c 9903 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9904 predicate(UseFMA && n->as_Vector()->length() == 4); 9905 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9906 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9907 ins_cost(150); 9908 ins_encode %{ 9909 int vector_len = 0; 9910 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9911 %} 9912 ins_pipe( pipe_slow ); 9913 %} 9914 9915 // a * b + c 9916 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9917 predicate(UseFMA && n->as_Vector()->length() == 8); 9918 match(Set c (FmaVF c (Binary a b))); 9919 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9920 ins_cost(150); 9921 ins_encode %{ 9922 int vector_len = 1; 9923 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9924 %} 9925 ins_pipe( pipe_slow ); 9926 %} 9927 9928 // a * b + c 9929 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9930 predicate(UseFMA && n->as_Vector()->length() == 8); 9931 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9932 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9933 ins_cost(150); 9934 ins_encode %{ 9935 int vector_len = 1; 9936 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9937 %} 9938 ins_pipe( pipe_slow ); 9939 %} 9940 9941 // a * b + c 9942 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9943 predicate(UseFMA && n->as_Vector()->length() == 16); 9944 match(Set c (FmaVF c (Binary a b))); 9945 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9946 ins_cost(150); 9947 ins_encode %{ 9948 int vector_len = 2; 9949 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9950 %} 9951 ins_pipe( pipe_slow ); 9952 %} 9953 9954 // a * b + c 9955 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9956 predicate(UseFMA && n->as_Vector()->length() == 16); 9957 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9958 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9959 ins_cost(150); 9960 ins_encode %{ 9961 int vector_len = 2; 9962 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9963 %} 9964 ins_pipe( pipe_slow ); 9965 %} 9966 9967 // --------------------------------- Vector Multiply Add -------------------------------------- 9968 9969 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9970 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9971 match(Set dst (MulAddVS2VI dst src1)); 9972 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9973 ins_encode %{ 9974 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9975 %} 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9980 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9981 match(Set dst (MulAddVS2VI src1 src2)); 9982 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9983 ins_encode %{ 9984 int vector_len = 0; 9985 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9986 %} 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9991 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9992 match(Set dst (MulAddVS2VI dst src1)); 9993 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9994 ins_encode %{ 9995 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9996 %} 9997 ins_pipe( pipe_slow ); 9998 %} 9999 10000 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 10001 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10002 match(Set dst (MulAddVS2VI src1 src2)); 10003 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 10004 ins_encode %{ 10005 int vector_len = 0; 10006 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 10012 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10013 match(Set dst (MulAddVS2VI src1 src2)); 10014 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 10015 ins_encode %{ 10016 int vector_len = 1; 10017 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10018 %} 10019 ins_pipe( pipe_slow ); 10020 %} 10021 10022 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10023 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10024 match(Set dst (MulAddVS2VI src1 src2)); 10025 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 10026 ins_encode %{ 10027 int vector_len = 2; 10028 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10029 %} 10030 ins_pipe( pipe_slow ); 10031 %} 10032 10033 // --------------------------------- Vector Multiply Add Add ---------------------------------- 10034 10035 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 10036 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 10037 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 10038 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 10039 ins_encode %{ 10040 int vector_len = 0; 10041 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10042 %} 10043 ins_pipe( pipe_slow ); 10044 ins_cost(10); 10045 %} 10046 10047 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 10048 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 10049 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 10050 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 10051 ins_encode %{ 10052 int vector_len = 0; 10053 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 ins_cost(10); 10057 %} 10058 10059 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 10060 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 10061 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 10062 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 10063 ins_encode %{ 10064 int vector_len = 1; 10065 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 ins_cost(10); 10069 %} 10070 10071 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10072 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 10073 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 10074 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 10075 ins_encode %{ 10076 int vector_len = 2; 10077 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10078 %} 10079 ins_pipe( pipe_slow ); 10080 ins_cost(10); 10081 %} 10082 10083 // --------------------------------- PopCount -------------------------------------- 10084 10085 instruct vpopcount2I(vecD dst, vecD src) %{ 10086 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 10087 match(Set dst (PopCountVI src)); 10088 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 10089 ins_encode %{ 10090 int vector_len = 0; 10091 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10092 %} 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 instruct vpopcount4I(vecX dst, vecX src) %{ 10097 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 10098 match(Set dst (PopCountVI src)); 10099 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 10100 ins_encode %{ 10101 int vector_len = 0; 10102 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10103 %} 10104 ins_pipe( pipe_slow ); 10105 %} 10106 10107 instruct vpopcount8I(vecY dst, vecY src) %{ 10108 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10109 match(Set dst (PopCountVI src)); 10110 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10111 ins_encode %{ 10112 int vector_len = 1; 10113 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10114 %} 10115 ins_pipe( pipe_slow ); 10116 %} 10117 10118 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10119 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10120 match(Set dst (PopCountVI src)); 10121 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10122 ins_encode %{ 10123 int vector_len = 2; 10124 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10125 %} 10126 ins_pipe( pipe_slow ); 10127 %}