1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1425 case Op_CompareAndSwapL: 1426 #ifdef _LP64 1427 case Op_CompareAndSwapP: 1428 #endif 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 } 1450 1451 return ret_value; // Per default match rules are supported. 1452 } 1453 1454 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1455 // identify extra cases that we might want to provide match rules for 1456 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1457 bool ret_value = match_rule_supported(opcode); 1458 if (ret_value) { 1459 switch (opcode) { 1460 case Op_AddVB: 1461 case Op_SubVB: 1462 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1463 ret_value = false; 1464 break; 1465 case Op_URShiftVS: 1466 case Op_RShiftVS: 1467 case Op_LShiftVS: 1468 case Op_MulVS: 1469 case Op_AddVS: 1470 case Op_SubVS: 1471 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1472 ret_value = false; 1473 break; 1474 case Op_CMoveVF: 1475 if (vlen != 8) 1476 ret_value = false; 1477 break; 1478 case Op_CMoveVD: 1479 if (vlen != 4) 1480 ret_value = false; 1481 break; 1482 } 1483 } 1484 1485 return ret_value; // Per default match rules are supported. 1486 } 1487 1488 const bool Matcher::has_predicated_vectors(void) { 1489 bool ret_value = false; 1490 if (UseAVX > 2) { 1491 ret_value = VM_Version::supports_avx512vl(); 1492 } 1493 1494 return ret_value; 1495 } 1496 1497 const int Matcher::float_pressure(int default_pressure_threshold) { 1498 int float_pressure_threshold = default_pressure_threshold; 1499 #ifdef _LP64 1500 if (UseAVX > 2) { 1501 // Increase pressure threshold on machines with AVX3 which have 1502 // 2x more XMM registers. 1503 float_pressure_threshold = default_pressure_threshold * 2; 1504 } 1505 #endif 1506 return float_pressure_threshold; 1507 } 1508 1509 // Max vector size in bytes. 0 if not supported. 1510 const int Matcher::vector_width_in_bytes(BasicType bt) { 1511 assert(is_java_primitive(bt), "only primitive type vectors"); 1512 if (UseSSE < 2) return 0; 1513 // SSE2 supports 128bit vectors for all types. 1514 // AVX2 supports 256bit vectors for all types. 1515 // AVX2/EVEX supports 512bit vectors for all types. 1516 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1517 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1518 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1519 size = (UseAVX > 2) ? 64 : 32; 1520 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1521 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1522 // Use flag to limit vector size. 1523 size = MIN2(size,(int)MaxVectorSize); 1524 // Minimum 2 values in vector (or 4 for bytes). 1525 switch (bt) { 1526 case T_DOUBLE: 1527 case T_LONG: 1528 if (size < 16) return 0; 1529 break; 1530 case T_FLOAT: 1531 case T_INT: 1532 if (size < 8) return 0; 1533 break; 1534 case T_BOOLEAN: 1535 if (size < 4) return 0; 1536 break; 1537 case T_CHAR: 1538 if (size < 4) return 0; 1539 break; 1540 case T_BYTE: 1541 if (size < 4) return 0; 1542 break; 1543 case T_SHORT: 1544 if (size < 4) return 0; 1545 break; 1546 default: 1547 ShouldNotReachHere(); 1548 } 1549 return size; 1550 } 1551 1552 // Limits on vector size (number of elements) loaded into vector. 1553 const int Matcher::max_vector_size(const BasicType bt) { 1554 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1555 } 1556 const int Matcher::min_vector_size(const BasicType bt) { 1557 int max_size = max_vector_size(bt); 1558 // Min size which can be loaded into vector is 4 bytes. 1559 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1560 return MIN2(size,max_size); 1561 } 1562 1563 // Vector ideal reg corresponding to specified size in bytes 1564 const uint Matcher::vector_ideal_reg(int size) { 1565 assert(MaxVectorSize >= size, ""); 1566 switch(size) { 1567 case 4: return Op_VecS; 1568 case 8: return Op_VecD; 1569 case 16: return Op_VecX; 1570 case 32: return Op_VecY; 1571 case 64: return Op_VecZ; 1572 } 1573 ShouldNotReachHere(); 1574 return 0; 1575 } 1576 1577 // Only lowest bits of xmm reg are used for vector shift count. 1578 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1579 return Op_VecS; 1580 } 1581 1582 // x86 supports misaligned vectors store/load. 1583 const bool Matcher::misaligned_vectors_ok() { 1584 return !AlignVector; // can be changed by flag 1585 } 1586 1587 // x86 AES instructions are compatible with SunJCE expanded 1588 // keys, hence we do not need to pass the original key to stubs 1589 const bool Matcher::pass_original_key_for_aes() { 1590 return false; 1591 } 1592 1593 1594 const bool Matcher::convi2l_type_required = true; 1595 1596 // Check for shift by small constant as well 1597 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1598 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1599 shift->in(2)->get_int() <= 3 && 1600 // Are there other uses besides address expressions? 1601 !matcher->is_visited(shift)) { 1602 address_visited.set(shift->_idx); // Flag as address_visited 1603 mstack.push(shift->in(2), Matcher::Visit); 1604 Node *conv = shift->in(1); 1605 #ifdef _LP64 1606 // Allow Matcher to match the rule which bypass 1607 // ConvI2L operation for an array index on LP64 1608 // if the index value is positive. 1609 if (conv->Opcode() == Op_ConvI2L && 1610 conv->as_Type()->type()->is_long()->_lo >= 0 && 1611 // Are there other uses besides address expressions? 1612 !matcher->is_visited(conv)) { 1613 address_visited.set(conv->_idx); // Flag as address_visited 1614 mstack.push(conv->in(1), Matcher::Pre_Visit); 1615 } else 1616 #endif 1617 mstack.push(conv, Matcher::Pre_Visit); 1618 return true; 1619 } 1620 return false; 1621 } 1622 1623 // Should the Matcher clone shifts on addressing modes, expecting them 1624 // to be subsumed into complex addressing expressions or compute them 1625 // into registers? 1626 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1627 Node *off = m->in(AddPNode::Offset); 1628 if (off->is_Con()) { 1629 address_visited.test_set(m->_idx); // Flag as address_visited 1630 Node *adr = m->in(AddPNode::Address); 1631 1632 // Intel can handle 2 adds in addressing mode 1633 // AtomicAdd is not an addressing expression. 1634 // Cheap to find it by looking for screwy base. 1635 if (adr->is_AddP() && 1636 !adr->in(AddPNode::Base)->is_top() && 1637 // Are there other uses besides address expressions? 1638 !is_visited(adr)) { 1639 address_visited.set(adr->_idx); // Flag as address_visited 1640 Node *shift = adr->in(AddPNode::Offset); 1641 if (!clone_shift(shift, this, mstack, address_visited)) { 1642 mstack.push(shift, Pre_Visit); 1643 } 1644 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1645 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1646 } else { 1647 mstack.push(adr, Pre_Visit); 1648 } 1649 1650 // Clone X+offset as it also folds into most addressing expressions 1651 mstack.push(off, Visit); 1652 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1653 return true; 1654 } else if (clone_shift(off, this, mstack, address_visited)) { 1655 address_visited.test_set(m->_idx); // Flag as address_visited 1656 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1657 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1658 return true; 1659 } 1660 return false; 1661 } 1662 1663 void Compile::reshape_address(AddPNode* addp) { 1664 } 1665 1666 // Helper methods for MachSpillCopyNode::implementation(). 1667 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1668 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1669 // In 64-bit VM size calculation is very complex. Emitting instructions 1670 // into scratch buffer is used to get size in 64-bit VM. 1671 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1672 assert(ireg == Op_VecS || // 32bit vector 1673 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1674 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1675 "no non-adjacent vector moves" ); 1676 if (cbuf) { 1677 MacroAssembler _masm(cbuf); 1678 int offset = __ offset(); 1679 switch (ireg) { 1680 case Op_VecS: // copy whole register 1681 case Op_VecD: 1682 case Op_VecX: 1683 #ifndef LP64 1684 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1685 #else 1686 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1687 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1688 } else { 1689 __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2); 1690 __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1691 } 1692 #endif 1693 break; 1694 case Op_VecY: 1695 #ifndef LP64 1696 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1697 #else 1698 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1699 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1700 } else { 1701 __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2); 1702 __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1703 } 1704 #endif 1705 break; 1706 case Op_VecZ: 1707 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1708 break; 1709 default: 1710 ShouldNotReachHere(); 1711 } 1712 int size = __ offset() - offset; 1713 #ifdef ASSERT 1714 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1715 assert(!do_size || size == 4, "incorrect size calculattion"); 1716 #endif 1717 return size; 1718 #ifndef PRODUCT 1719 } else if (!do_size) { 1720 switch (ireg) { 1721 case Op_VecS: 1722 case Op_VecD: 1723 case Op_VecX: 1724 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1725 break; 1726 case Op_VecY: 1727 case Op_VecZ: 1728 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1729 break; 1730 default: 1731 ShouldNotReachHere(); 1732 } 1733 #endif 1734 } 1735 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1736 return (UseAVX > 2) ? 6 : 4; 1737 } 1738 1739 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1740 int stack_offset, int reg, uint ireg, outputStream* st) { 1741 // In 64-bit VM size calculation is very complex. Emitting instructions 1742 // into scratch buffer is used to get size in 64-bit VM. 1743 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1744 if (cbuf) { 1745 MacroAssembler _masm(cbuf); 1746 int offset = __ offset(); 1747 if (is_load) { 1748 switch (ireg) { 1749 case Op_VecS: 1750 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1751 break; 1752 case Op_VecD: 1753 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1754 break; 1755 case Op_VecX: 1756 #ifndef LP64 1757 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1758 #else 1759 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1760 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1761 } else { 1762 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1763 __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1764 } 1765 #endif 1766 break; 1767 case Op_VecY: 1768 #ifndef LP64 1769 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1770 #else 1771 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1772 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1773 } else { 1774 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1775 __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1776 } 1777 #endif 1778 break; 1779 case Op_VecZ: 1780 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1781 break; 1782 default: 1783 ShouldNotReachHere(); 1784 } 1785 } else { // store 1786 switch (ireg) { 1787 case Op_VecS: 1788 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1789 break; 1790 case Op_VecD: 1791 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1792 break; 1793 case Op_VecX: 1794 #ifndef LP64 1795 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1796 #else 1797 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1798 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1799 } 1800 else { 1801 __ vextracti32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1802 } 1803 #endif 1804 break; 1805 case Op_VecY: 1806 #ifndef LP64 1807 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1808 #else 1809 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1810 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1811 } 1812 else { 1813 __ vextracti64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1814 } 1815 #endif 1816 break; 1817 case Op_VecZ: 1818 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1819 break; 1820 default: 1821 ShouldNotReachHere(); 1822 } 1823 } 1824 int size = __ offset() - offset; 1825 #ifdef ASSERT 1826 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1828 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1829 #endif 1830 return size; 1831 #ifndef PRODUCT 1832 } else if (!do_size) { 1833 if (is_load) { 1834 switch (ireg) { 1835 case Op_VecS: 1836 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1837 break; 1838 case Op_VecD: 1839 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1840 break; 1841 case Op_VecX: 1842 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1843 break; 1844 case Op_VecY: 1845 case Op_VecZ: 1846 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1847 break; 1848 default: 1849 ShouldNotReachHere(); 1850 } 1851 } else { // store 1852 switch (ireg) { 1853 case Op_VecS: 1854 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1855 break; 1856 case Op_VecD: 1857 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1858 break; 1859 case Op_VecX: 1860 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1861 break; 1862 case Op_VecY: 1863 case Op_VecZ: 1864 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1865 break; 1866 default: 1867 ShouldNotReachHere(); 1868 } 1869 } 1870 #endif 1871 } 1872 bool is_single_byte = false; 1873 int vec_len = 0; 1874 if ((UseAVX > 2) && (stack_offset != 0)) { 1875 int tuple_type = Assembler::EVEX_FVM; 1876 int input_size = Assembler::EVEX_32bit; 1877 switch (ireg) { 1878 case Op_VecS: 1879 tuple_type = Assembler::EVEX_T1S; 1880 break; 1881 case Op_VecD: 1882 tuple_type = Assembler::EVEX_T1S; 1883 input_size = Assembler::EVEX_64bit; 1884 break; 1885 case Op_VecX: 1886 break; 1887 case Op_VecY: 1888 vec_len = 1; 1889 break; 1890 case Op_VecZ: 1891 vec_len = 2; 1892 break; 1893 } 1894 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1895 } 1896 int offset_size = 0; 1897 int size = 5; 1898 if (UseAVX > 2 ) { 1899 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1900 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1901 size += 2; // Need an additional two bytes for EVEX encoding 1902 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1903 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1904 } else { 1905 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1906 size += 2; // Need an additional two bytes for EVEX encodding 1907 } 1908 } else { 1909 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1910 } 1911 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1912 return size+offset_size; 1913 } 1914 1915 static inline jint replicate4_imm(int con, int width) { 1916 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1917 assert(width == 1 || width == 2, "only byte or short types here"); 1918 int bit_width = width * 8; 1919 jint val = con; 1920 val &= (1 << bit_width) - 1; // mask off sign bits 1921 while(bit_width < 32) { 1922 val |= (val << bit_width); 1923 bit_width <<= 1; 1924 } 1925 return val; 1926 } 1927 1928 static inline jlong replicate8_imm(int con, int width) { 1929 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1930 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1931 int bit_width = width * 8; 1932 jlong val = con; 1933 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1934 while(bit_width < 64) { 1935 val |= (val << bit_width); 1936 bit_width <<= 1; 1937 } 1938 return val; 1939 } 1940 1941 #ifndef PRODUCT 1942 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1943 st->print("nop \t# %d bytes pad for loops and calls", _count); 1944 } 1945 #endif 1946 1947 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1948 MacroAssembler _masm(&cbuf); 1949 __ nop(_count); 1950 } 1951 1952 uint MachNopNode::size(PhaseRegAlloc*) const { 1953 return _count; 1954 } 1955 1956 #ifndef PRODUCT 1957 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1958 st->print("# breakpoint"); 1959 } 1960 #endif 1961 1962 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1963 MacroAssembler _masm(&cbuf); 1964 __ int3(); 1965 } 1966 1967 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1968 return MachNode::size(ra_); 1969 } 1970 1971 %} 1972 1973 encode %{ 1974 1975 enc_class call_epilog %{ 1976 if (VerifyStackAtCalls) { 1977 // Check that stack depth is unchanged: find majik cookie on stack 1978 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1979 MacroAssembler _masm(&cbuf); 1980 Label L; 1981 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1982 __ jccb(Assembler::equal, L); 1983 // Die if stack mismatch 1984 __ int3(); 1985 __ bind(L); 1986 } 1987 %} 1988 1989 %} 1990 1991 1992 //----------OPERANDS----------------------------------------------------------- 1993 // Operand definitions must precede instruction definitions for correct parsing 1994 // in the ADLC because operands constitute user defined types which are used in 1995 // instruction definitions. 1996 1997 operand vecZ() %{ 1998 constraint(ALLOC_IN_RC(vectorz_reg)); 1999 match(VecZ); 2000 2001 format %{ %} 2002 interface(REG_INTER); 2003 %} 2004 2005 operand legVecZ() %{ 2006 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2007 match(VecZ); 2008 2009 format %{ %} 2010 interface(REG_INTER); 2011 %} 2012 2013 // Comparison Code for FP conditional move 2014 operand cmpOp_vcmppd() %{ 2015 match(Bool); 2016 2017 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2018 n->as_Bool()->_test._test != BoolTest::no_overflow); 2019 format %{ "" %} 2020 interface(COND_INTER) %{ 2021 equal (0x0, "eq"); 2022 less (0x1, "lt"); 2023 less_equal (0x2, "le"); 2024 not_equal (0xC, "ne"); 2025 greater_equal(0xD, "ge"); 2026 greater (0xE, "gt"); 2027 //TODO cannot compile (adlc breaks) without two next lines with error: 2028 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2029 // equal' for overflow. 2030 overflow (0x20, "o"); // not really supported by the instruction 2031 no_overflow (0x21, "no"); // not really supported by the instruction 2032 %} 2033 %} 2034 2035 2036 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2037 2038 // ============================================================================ 2039 2040 instruct ShouldNotReachHere() %{ 2041 match(Halt); 2042 format %{ "ud2\t# ShouldNotReachHere" %} 2043 ins_encode %{ 2044 __ ud2(); 2045 %} 2046 ins_pipe(pipe_slow); 2047 %} 2048 2049 // =================================EVEX special=============================== 2050 2051 instruct setMask(rRegI dst, rRegI src) %{ 2052 predicate(Matcher::has_predicated_vectors()); 2053 match(Set dst (SetVectMaskI src)); 2054 effect(TEMP dst); 2055 format %{ "setvectmask $dst, $src" %} 2056 ins_encode %{ 2057 __ setvectmask($dst$$Register, $src$$Register); 2058 %} 2059 ins_pipe(pipe_slow); 2060 %} 2061 2062 // ============================================================================ 2063 2064 instruct addF_reg(regF dst, regF src) %{ 2065 predicate((UseSSE>=1) && (UseAVX == 0)); 2066 match(Set dst (AddF dst src)); 2067 2068 format %{ "addss $dst, $src" %} 2069 ins_cost(150); 2070 ins_encode %{ 2071 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2072 %} 2073 ins_pipe(pipe_slow); 2074 %} 2075 2076 instruct addF_mem(regF dst, memory src) %{ 2077 predicate((UseSSE>=1) && (UseAVX == 0)); 2078 match(Set dst (AddF dst (LoadF src))); 2079 2080 format %{ "addss $dst, $src" %} 2081 ins_cost(150); 2082 ins_encode %{ 2083 __ addss($dst$$XMMRegister, $src$$Address); 2084 %} 2085 ins_pipe(pipe_slow); 2086 %} 2087 2088 instruct addF_imm(regF dst, immF con) %{ 2089 predicate((UseSSE>=1) && (UseAVX == 0)); 2090 match(Set dst (AddF dst con)); 2091 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2092 ins_cost(150); 2093 ins_encode %{ 2094 __ addss($dst$$XMMRegister, $constantaddress($con)); 2095 %} 2096 ins_pipe(pipe_slow); 2097 %} 2098 2099 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2100 predicate(UseAVX > 0); 2101 match(Set dst (AddF src1 src2)); 2102 2103 format %{ "vaddss $dst, $src1, $src2" %} 2104 ins_cost(150); 2105 ins_encode %{ 2106 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2107 %} 2108 ins_pipe(pipe_slow); 2109 %} 2110 2111 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2112 predicate(UseAVX > 0); 2113 match(Set dst (AddF src1 (LoadF src2))); 2114 2115 format %{ "vaddss $dst, $src1, $src2" %} 2116 ins_cost(150); 2117 ins_encode %{ 2118 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2119 %} 2120 ins_pipe(pipe_slow); 2121 %} 2122 2123 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2124 predicate(UseAVX > 0); 2125 match(Set dst (AddF src con)); 2126 2127 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2128 ins_cost(150); 2129 ins_encode %{ 2130 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2131 %} 2132 ins_pipe(pipe_slow); 2133 %} 2134 2135 instruct addD_reg(regD dst, regD src) %{ 2136 predicate((UseSSE>=2) && (UseAVX == 0)); 2137 match(Set dst (AddD dst src)); 2138 2139 format %{ "addsd $dst, $src" %} 2140 ins_cost(150); 2141 ins_encode %{ 2142 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2143 %} 2144 ins_pipe(pipe_slow); 2145 %} 2146 2147 instruct addD_mem(regD dst, memory src) %{ 2148 predicate((UseSSE>=2) && (UseAVX == 0)); 2149 match(Set dst (AddD dst (LoadD src))); 2150 2151 format %{ "addsd $dst, $src" %} 2152 ins_cost(150); 2153 ins_encode %{ 2154 __ addsd($dst$$XMMRegister, $src$$Address); 2155 %} 2156 ins_pipe(pipe_slow); 2157 %} 2158 2159 instruct addD_imm(regD dst, immD con) %{ 2160 predicate((UseSSE>=2) && (UseAVX == 0)); 2161 match(Set dst (AddD dst con)); 2162 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2163 ins_cost(150); 2164 ins_encode %{ 2165 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2166 %} 2167 ins_pipe(pipe_slow); 2168 %} 2169 2170 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2171 predicate(UseAVX > 0); 2172 match(Set dst (AddD src1 src2)); 2173 2174 format %{ "vaddsd $dst, $src1, $src2" %} 2175 ins_cost(150); 2176 ins_encode %{ 2177 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2178 %} 2179 ins_pipe(pipe_slow); 2180 %} 2181 2182 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2183 predicate(UseAVX > 0); 2184 match(Set dst (AddD src1 (LoadD src2))); 2185 2186 format %{ "vaddsd $dst, $src1, $src2" %} 2187 ins_cost(150); 2188 ins_encode %{ 2189 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2190 %} 2191 ins_pipe(pipe_slow); 2192 %} 2193 2194 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2195 predicate(UseAVX > 0); 2196 match(Set dst (AddD src con)); 2197 2198 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2199 ins_cost(150); 2200 ins_encode %{ 2201 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2202 %} 2203 ins_pipe(pipe_slow); 2204 %} 2205 2206 instruct subF_reg(regF dst, regF src) %{ 2207 predicate((UseSSE>=1) && (UseAVX == 0)); 2208 match(Set dst (SubF dst src)); 2209 2210 format %{ "subss $dst, $src" %} 2211 ins_cost(150); 2212 ins_encode %{ 2213 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2214 %} 2215 ins_pipe(pipe_slow); 2216 %} 2217 2218 instruct subF_mem(regF dst, memory src) %{ 2219 predicate((UseSSE>=1) && (UseAVX == 0)); 2220 match(Set dst (SubF dst (LoadF src))); 2221 2222 format %{ "subss $dst, $src" %} 2223 ins_cost(150); 2224 ins_encode %{ 2225 __ subss($dst$$XMMRegister, $src$$Address); 2226 %} 2227 ins_pipe(pipe_slow); 2228 %} 2229 2230 instruct subF_imm(regF dst, immF con) %{ 2231 predicate((UseSSE>=1) && (UseAVX == 0)); 2232 match(Set dst (SubF dst con)); 2233 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2234 ins_cost(150); 2235 ins_encode %{ 2236 __ subss($dst$$XMMRegister, $constantaddress($con)); 2237 %} 2238 ins_pipe(pipe_slow); 2239 %} 2240 2241 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2242 predicate(UseAVX > 0); 2243 match(Set dst (SubF src1 src2)); 2244 2245 format %{ "vsubss $dst, $src1, $src2" %} 2246 ins_cost(150); 2247 ins_encode %{ 2248 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2249 %} 2250 ins_pipe(pipe_slow); 2251 %} 2252 2253 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2254 predicate(UseAVX > 0); 2255 match(Set dst (SubF src1 (LoadF src2))); 2256 2257 format %{ "vsubss $dst, $src1, $src2" %} 2258 ins_cost(150); 2259 ins_encode %{ 2260 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2261 %} 2262 ins_pipe(pipe_slow); 2263 %} 2264 2265 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2266 predicate(UseAVX > 0); 2267 match(Set dst (SubF src con)); 2268 2269 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2270 ins_cost(150); 2271 ins_encode %{ 2272 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2273 %} 2274 ins_pipe(pipe_slow); 2275 %} 2276 2277 instruct subD_reg(regD dst, regD src) %{ 2278 predicate((UseSSE>=2) && (UseAVX == 0)); 2279 match(Set dst (SubD dst src)); 2280 2281 format %{ "subsd $dst, $src" %} 2282 ins_cost(150); 2283 ins_encode %{ 2284 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2285 %} 2286 ins_pipe(pipe_slow); 2287 %} 2288 2289 instruct subD_mem(regD dst, memory src) %{ 2290 predicate((UseSSE>=2) && (UseAVX == 0)); 2291 match(Set dst (SubD dst (LoadD src))); 2292 2293 format %{ "subsd $dst, $src" %} 2294 ins_cost(150); 2295 ins_encode %{ 2296 __ subsd($dst$$XMMRegister, $src$$Address); 2297 %} 2298 ins_pipe(pipe_slow); 2299 %} 2300 2301 instruct subD_imm(regD dst, immD con) %{ 2302 predicate((UseSSE>=2) && (UseAVX == 0)); 2303 match(Set dst (SubD dst con)); 2304 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2305 ins_cost(150); 2306 ins_encode %{ 2307 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2308 %} 2309 ins_pipe(pipe_slow); 2310 %} 2311 2312 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2313 predicate(UseAVX > 0); 2314 match(Set dst (SubD src1 src2)); 2315 2316 format %{ "vsubsd $dst, $src1, $src2" %} 2317 ins_cost(150); 2318 ins_encode %{ 2319 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2320 %} 2321 ins_pipe(pipe_slow); 2322 %} 2323 2324 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2325 predicate(UseAVX > 0); 2326 match(Set dst (SubD src1 (LoadD src2))); 2327 2328 format %{ "vsubsd $dst, $src1, $src2" %} 2329 ins_cost(150); 2330 ins_encode %{ 2331 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2332 %} 2333 ins_pipe(pipe_slow); 2334 %} 2335 2336 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2337 predicate(UseAVX > 0); 2338 match(Set dst (SubD src con)); 2339 2340 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2341 ins_cost(150); 2342 ins_encode %{ 2343 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2344 %} 2345 ins_pipe(pipe_slow); 2346 %} 2347 2348 instruct mulF_reg(regF dst, regF src) %{ 2349 predicate((UseSSE>=1) && (UseAVX == 0)); 2350 match(Set dst (MulF dst src)); 2351 2352 format %{ "mulss $dst, $src" %} 2353 ins_cost(150); 2354 ins_encode %{ 2355 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2356 %} 2357 ins_pipe(pipe_slow); 2358 %} 2359 2360 instruct mulF_mem(regF dst, memory src) %{ 2361 predicate((UseSSE>=1) && (UseAVX == 0)); 2362 match(Set dst (MulF dst (LoadF src))); 2363 2364 format %{ "mulss $dst, $src" %} 2365 ins_cost(150); 2366 ins_encode %{ 2367 __ mulss($dst$$XMMRegister, $src$$Address); 2368 %} 2369 ins_pipe(pipe_slow); 2370 %} 2371 2372 instruct mulF_imm(regF dst, immF con) %{ 2373 predicate((UseSSE>=1) && (UseAVX == 0)); 2374 match(Set dst (MulF dst con)); 2375 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2376 ins_cost(150); 2377 ins_encode %{ 2378 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2379 %} 2380 ins_pipe(pipe_slow); 2381 %} 2382 2383 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2384 predicate(UseAVX > 0); 2385 match(Set dst (MulF src1 src2)); 2386 2387 format %{ "vmulss $dst, $src1, $src2" %} 2388 ins_cost(150); 2389 ins_encode %{ 2390 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2391 %} 2392 ins_pipe(pipe_slow); 2393 %} 2394 2395 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2396 predicate(UseAVX > 0); 2397 match(Set dst (MulF src1 (LoadF src2))); 2398 2399 format %{ "vmulss $dst, $src1, $src2" %} 2400 ins_cost(150); 2401 ins_encode %{ 2402 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2403 %} 2404 ins_pipe(pipe_slow); 2405 %} 2406 2407 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2408 predicate(UseAVX > 0); 2409 match(Set dst (MulF src con)); 2410 2411 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2412 ins_cost(150); 2413 ins_encode %{ 2414 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2415 %} 2416 ins_pipe(pipe_slow); 2417 %} 2418 2419 instruct mulD_reg(regD dst, regD src) %{ 2420 predicate((UseSSE>=2) && (UseAVX == 0)); 2421 match(Set dst (MulD dst src)); 2422 2423 format %{ "mulsd $dst, $src" %} 2424 ins_cost(150); 2425 ins_encode %{ 2426 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2427 %} 2428 ins_pipe(pipe_slow); 2429 %} 2430 2431 instruct mulD_mem(regD dst, memory src) %{ 2432 predicate((UseSSE>=2) && (UseAVX == 0)); 2433 match(Set dst (MulD dst (LoadD src))); 2434 2435 format %{ "mulsd $dst, $src" %} 2436 ins_cost(150); 2437 ins_encode %{ 2438 __ mulsd($dst$$XMMRegister, $src$$Address); 2439 %} 2440 ins_pipe(pipe_slow); 2441 %} 2442 2443 instruct mulD_imm(regD dst, immD con) %{ 2444 predicate((UseSSE>=2) && (UseAVX == 0)); 2445 match(Set dst (MulD dst con)); 2446 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2447 ins_cost(150); 2448 ins_encode %{ 2449 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2450 %} 2451 ins_pipe(pipe_slow); 2452 %} 2453 2454 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2455 predicate(UseAVX > 0); 2456 match(Set dst (MulD src1 src2)); 2457 2458 format %{ "vmulsd $dst, $src1, $src2" %} 2459 ins_cost(150); 2460 ins_encode %{ 2461 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2462 %} 2463 ins_pipe(pipe_slow); 2464 %} 2465 2466 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2467 predicate(UseAVX > 0); 2468 match(Set dst (MulD src1 (LoadD src2))); 2469 2470 format %{ "vmulsd $dst, $src1, $src2" %} 2471 ins_cost(150); 2472 ins_encode %{ 2473 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2474 %} 2475 ins_pipe(pipe_slow); 2476 %} 2477 2478 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2479 predicate(UseAVX > 0); 2480 match(Set dst (MulD src con)); 2481 2482 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2483 ins_cost(150); 2484 ins_encode %{ 2485 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2486 %} 2487 ins_pipe(pipe_slow); 2488 %} 2489 2490 instruct divF_reg(regF dst, regF src) %{ 2491 predicate((UseSSE>=1) && (UseAVX == 0)); 2492 match(Set dst (DivF dst src)); 2493 2494 format %{ "divss $dst, $src" %} 2495 ins_cost(150); 2496 ins_encode %{ 2497 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2498 %} 2499 ins_pipe(pipe_slow); 2500 %} 2501 2502 instruct divF_mem(regF dst, memory src) %{ 2503 predicate((UseSSE>=1) && (UseAVX == 0)); 2504 match(Set dst (DivF dst (LoadF src))); 2505 2506 format %{ "divss $dst, $src" %} 2507 ins_cost(150); 2508 ins_encode %{ 2509 __ divss($dst$$XMMRegister, $src$$Address); 2510 %} 2511 ins_pipe(pipe_slow); 2512 %} 2513 2514 instruct divF_imm(regF dst, immF con) %{ 2515 predicate((UseSSE>=1) && (UseAVX == 0)); 2516 match(Set dst (DivF dst con)); 2517 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2518 ins_cost(150); 2519 ins_encode %{ 2520 __ divss($dst$$XMMRegister, $constantaddress($con)); 2521 %} 2522 ins_pipe(pipe_slow); 2523 %} 2524 2525 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2526 predicate(UseAVX > 0); 2527 match(Set dst (DivF src1 src2)); 2528 2529 format %{ "vdivss $dst, $src1, $src2" %} 2530 ins_cost(150); 2531 ins_encode %{ 2532 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2533 %} 2534 ins_pipe(pipe_slow); 2535 %} 2536 2537 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2538 predicate(UseAVX > 0); 2539 match(Set dst (DivF src1 (LoadF src2))); 2540 2541 format %{ "vdivss $dst, $src1, $src2" %} 2542 ins_cost(150); 2543 ins_encode %{ 2544 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2545 %} 2546 ins_pipe(pipe_slow); 2547 %} 2548 2549 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2550 predicate(UseAVX > 0); 2551 match(Set dst (DivF src con)); 2552 2553 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2554 ins_cost(150); 2555 ins_encode %{ 2556 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2557 %} 2558 ins_pipe(pipe_slow); 2559 %} 2560 2561 instruct divD_reg(regD dst, regD src) %{ 2562 predicate((UseSSE>=2) && (UseAVX == 0)); 2563 match(Set dst (DivD dst src)); 2564 2565 format %{ "divsd $dst, $src" %} 2566 ins_cost(150); 2567 ins_encode %{ 2568 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2569 %} 2570 ins_pipe(pipe_slow); 2571 %} 2572 2573 instruct divD_mem(regD dst, memory src) %{ 2574 predicate((UseSSE>=2) && (UseAVX == 0)); 2575 match(Set dst (DivD dst (LoadD src))); 2576 2577 format %{ "divsd $dst, $src" %} 2578 ins_cost(150); 2579 ins_encode %{ 2580 __ divsd($dst$$XMMRegister, $src$$Address); 2581 %} 2582 ins_pipe(pipe_slow); 2583 %} 2584 2585 instruct divD_imm(regD dst, immD con) %{ 2586 predicate((UseSSE>=2) && (UseAVX == 0)); 2587 match(Set dst (DivD dst con)); 2588 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2589 ins_cost(150); 2590 ins_encode %{ 2591 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2592 %} 2593 ins_pipe(pipe_slow); 2594 %} 2595 2596 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2597 predicate(UseAVX > 0); 2598 match(Set dst (DivD src1 src2)); 2599 2600 format %{ "vdivsd $dst, $src1, $src2" %} 2601 ins_cost(150); 2602 ins_encode %{ 2603 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2604 %} 2605 ins_pipe(pipe_slow); 2606 %} 2607 2608 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2609 predicate(UseAVX > 0); 2610 match(Set dst (DivD src1 (LoadD src2))); 2611 2612 format %{ "vdivsd $dst, $src1, $src2" %} 2613 ins_cost(150); 2614 ins_encode %{ 2615 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2616 %} 2617 ins_pipe(pipe_slow); 2618 %} 2619 2620 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2621 predicate(UseAVX > 0); 2622 match(Set dst (DivD src con)); 2623 2624 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2625 ins_cost(150); 2626 ins_encode %{ 2627 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2628 %} 2629 ins_pipe(pipe_slow); 2630 %} 2631 2632 instruct absF_reg(regF dst) %{ 2633 predicate((UseSSE>=1) && (UseAVX == 0)); 2634 match(Set dst (AbsF dst)); 2635 ins_cost(150); 2636 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2637 ins_encode %{ 2638 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2639 %} 2640 ins_pipe(pipe_slow); 2641 %} 2642 2643 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2644 predicate(UseAVX > 0); 2645 match(Set dst (AbsF src)); 2646 ins_cost(150); 2647 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2648 ins_encode %{ 2649 int vector_len = 0; 2650 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2651 ExternalAddress(float_signmask()), vector_len); 2652 %} 2653 ins_pipe(pipe_slow); 2654 %} 2655 2656 instruct absD_reg(regD dst) %{ 2657 predicate((UseSSE>=2) && (UseAVX == 0)); 2658 match(Set dst (AbsD dst)); 2659 ins_cost(150); 2660 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2661 "# abs double by sign masking" %} 2662 ins_encode %{ 2663 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2669 predicate(UseAVX > 0); 2670 match(Set dst (AbsD src)); 2671 ins_cost(150); 2672 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2673 "# abs double by sign masking" %} 2674 ins_encode %{ 2675 int vector_len = 0; 2676 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2677 ExternalAddress(double_signmask()), vector_len); 2678 %} 2679 ins_pipe(pipe_slow); 2680 %} 2681 2682 instruct negF_reg(regF dst) %{ 2683 predicate((UseSSE>=1) && (UseAVX == 0)); 2684 match(Set dst (NegF dst)); 2685 ins_cost(150); 2686 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2687 ins_encode %{ 2688 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2689 %} 2690 ins_pipe(pipe_slow); 2691 %} 2692 2693 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2694 predicate(UseAVX > 0); 2695 match(Set dst (NegF src)); 2696 ins_cost(150); 2697 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2698 ins_encode %{ 2699 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2700 ExternalAddress(float_signflip())); 2701 %} 2702 ins_pipe(pipe_slow); 2703 %} 2704 2705 instruct negD_reg(regD dst) %{ 2706 predicate((UseSSE>=2) && (UseAVX == 0)); 2707 match(Set dst (NegD dst)); 2708 ins_cost(150); 2709 format %{ "xorpd $dst, [0x8000000000000000]\t" 2710 "# neg double by sign flipping" %} 2711 ins_encode %{ 2712 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2718 predicate(UseAVX > 0); 2719 match(Set dst (NegD src)); 2720 ins_cost(150); 2721 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2722 "# neg double by sign flipping" %} 2723 ins_encode %{ 2724 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2725 ExternalAddress(double_signflip())); 2726 %} 2727 ins_pipe(pipe_slow); 2728 %} 2729 2730 instruct sqrtF_reg(regF dst, regF src) %{ 2731 predicate(UseSSE>=1); 2732 match(Set dst (SqrtF src)); 2733 2734 format %{ "sqrtss $dst, $src" %} 2735 ins_cost(150); 2736 ins_encode %{ 2737 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct sqrtF_mem(regF dst, memory src) %{ 2743 predicate(UseSSE>=1); 2744 match(Set dst (SqrtF (LoadF src))); 2745 2746 format %{ "sqrtss $dst, $src" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ sqrtss($dst$$XMMRegister, $src$$Address); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct sqrtF_imm(regF dst, immF con) %{ 2755 predicate(UseSSE>=1); 2756 match(Set dst (SqrtF con)); 2757 2758 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct sqrtD_reg(regD dst, regD src) %{ 2767 predicate(UseSSE>=2); 2768 match(Set dst (SqrtD src)); 2769 2770 format %{ "sqrtsd $dst, $src" %} 2771 ins_cost(150); 2772 ins_encode %{ 2773 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2774 %} 2775 ins_pipe(pipe_slow); 2776 %} 2777 2778 instruct sqrtD_mem(regD dst, memory src) %{ 2779 predicate(UseSSE>=2); 2780 match(Set dst (SqrtD (LoadD src))); 2781 2782 format %{ "sqrtsd $dst, $src" %} 2783 ins_cost(150); 2784 ins_encode %{ 2785 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2786 %} 2787 ins_pipe(pipe_slow); 2788 %} 2789 2790 instruct sqrtD_imm(regD dst, immD con) %{ 2791 predicate(UseSSE>=2); 2792 match(Set dst (SqrtD con)); 2793 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2794 ins_cost(150); 2795 ins_encode %{ 2796 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2797 %} 2798 ins_pipe(pipe_slow); 2799 %} 2800 2801 instruct onspinwait() %{ 2802 match(OnSpinWait); 2803 ins_cost(200); 2804 2805 format %{ 2806 $$template 2807 $$emit$$"pause\t! membar_onspinwait" 2808 %} 2809 ins_encode %{ 2810 __ pause(); 2811 %} 2812 ins_pipe(pipe_slow); 2813 %} 2814 2815 // a * b + c 2816 instruct fmaD_reg(regD a, regD b, regD c) %{ 2817 predicate(UseFMA); 2818 match(Set c (FmaD c (Binary a b))); 2819 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2820 ins_cost(150); 2821 ins_encode %{ 2822 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2823 %} 2824 ins_pipe( pipe_slow ); 2825 %} 2826 2827 // a * b + c 2828 instruct fmaF_reg(regF a, regF b, regF c) %{ 2829 predicate(UseFMA); 2830 match(Set c (FmaF c (Binary a b))); 2831 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2832 ins_cost(150); 2833 ins_encode %{ 2834 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2835 %} 2836 ins_pipe( pipe_slow ); 2837 %} 2838 2839 // ====================VECTOR INSTRUCTIONS===================================== 2840 2841 2842 // Load vectors (4 bytes long) 2843 instruct loadV4(vecS dst, memory mem) %{ 2844 predicate(n->as_LoadVector()->memory_size() == 4); 2845 match(Set dst (LoadVector mem)); 2846 ins_cost(125); 2847 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2848 ins_encode %{ 2849 __ movdl($dst$$XMMRegister, $mem$$Address); 2850 %} 2851 ins_pipe( pipe_slow ); 2852 %} 2853 2854 // Load vectors (4 bytes long) 2855 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2856 match(Set dst src); 2857 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2858 ins_encode %{ 2859 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2860 %} 2861 ins_pipe( fpu_reg_reg ); 2862 %} 2863 2864 // Load vectors (4 bytes long) 2865 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2866 match(Set dst src); 2867 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2868 ins_encode %{ 2869 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2870 %} 2871 ins_pipe( fpu_reg_reg ); 2872 %} 2873 2874 // Load vectors (8 bytes long) 2875 instruct loadV8(vecD dst, memory mem) %{ 2876 predicate(n->as_LoadVector()->memory_size() == 8); 2877 match(Set dst (LoadVector mem)); 2878 ins_cost(125); 2879 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2880 ins_encode %{ 2881 __ movq($dst$$XMMRegister, $mem$$Address); 2882 %} 2883 ins_pipe( pipe_slow ); 2884 %} 2885 2886 // Load vectors (8 bytes long) 2887 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2888 match(Set dst src); 2889 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2890 ins_encode %{ 2891 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2892 %} 2893 ins_pipe( fpu_reg_reg ); 2894 %} 2895 2896 // Load vectors (8 bytes long) 2897 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2898 match(Set dst src); 2899 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2900 ins_encode %{ 2901 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2902 %} 2903 ins_pipe( fpu_reg_reg ); 2904 %} 2905 2906 // Load vectors (16 bytes long) 2907 instruct loadV16(vecX dst, memory mem) %{ 2908 predicate(n->as_LoadVector()->memory_size() == 16); 2909 match(Set dst (LoadVector mem)); 2910 ins_cost(125); 2911 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2912 ins_encode %{ 2913 __ movdqu($dst$$XMMRegister, $mem$$Address); 2914 %} 2915 ins_pipe( pipe_slow ); 2916 %} 2917 2918 // Load vectors (16 bytes long) 2919 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2920 match(Set dst src); 2921 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2922 ins_encode %{ 2923 if (UseAVX < 2 || VM_Version::supports_avx512vl()) { 2924 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2925 } else { 2926 int vector_len = 2; 2927 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2928 } 2929 %} 2930 ins_pipe( fpu_reg_reg ); 2931 %} 2932 2933 // Load vectors (16 bytes long) 2934 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2935 match(Set dst src); 2936 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2937 ins_encode %{ 2938 if (UseAVX < 2 || VM_Version::supports_avx512vl()) { 2939 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2940 } else { 2941 int vector_len = 2; 2942 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2943 } 2944 %} 2945 ins_pipe( fpu_reg_reg ); 2946 %} 2947 2948 // Load vectors (32 bytes long) 2949 instruct loadV32(vecY dst, memory mem) %{ 2950 predicate(n->as_LoadVector()->memory_size() == 32); 2951 match(Set dst (LoadVector mem)); 2952 ins_cost(125); 2953 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2954 ins_encode %{ 2955 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2956 %} 2957 ins_pipe( pipe_slow ); 2958 %} 2959 2960 // Load vectors (32 bytes long) 2961 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 2962 match(Set dst src); 2963 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2964 ins_encode %{ 2965 if (UseAVX < 2 || VM_Version::supports_avx512vl()) { 2966 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2967 } else { 2968 int vector_len = 2; 2969 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2970 } 2971 %} 2972 ins_pipe( fpu_reg_reg ); 2973 %} 2974 2975 // Load vectors (32 bytes long) 2976 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 2977 match(Set dst src); 2978 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2979 ins_encode %{ 2980 if (UseAVX < 2 || VM_Version::supports_avx512vl()) { 2981 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2982 } else { 2983 int vector_len = 2; 2984 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2985 } 2986 %} 2987 ins_pipe( fpu_reg_reg ); 2988 %} 2989 2990 // Load vectors (64 bytes long) 2991 instruct loadV64_dword(vecZ dst, memory mem) %{ 2992 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2993 match(Set dst (LoadVector mem)); 2994 ins_cost(125); 2995 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2996 ins_encode %{ 2997 int vector_len = 2; 2998 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2999 %} 3000 ins_pipe( pipe_slow ); 3001 %} 3002 3003 // Load vectors (64 bytes long) 3004 instruct loadV64_qword(vecZ dst, memory mem) %{ 3005 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3006 match(Set dst (LoadVector mem)); 3007 ins_cost(125); 3008 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3009 ins_encode %{ 3010 int vector_len = 2; 3011 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3012 %} 3013 ins_pipe( pipe_slow ); 3014 %} 3015 3016 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3017 match(Set dst src); 3018 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3019 ins_encode %{ 3020 int vector_len = 2; 3021 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3022 %} 3023 ins_pipe( fpu_reg_reg ); 3024 %} 3025 3026 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3027 match(Set dst src); 3028 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3029 ins_encode %{ 3030 int vector_len = 2; 3031 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3032 %} 3033 ins_pipe( fpu_reg_reg ); 3034 %} 3035 3036 // Store vectors 3037 instruct storeV4(memory mem, vecS src) %{ 3038 predicate(n->as_StoreVector()->memory_size() == 4); 3039 match(Set mem (StoreVector mem src)); 3040 ins_cost(145); 3041 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3042 ins_encode %{ 3043 __ movdl($mem$$Address, $src$$XMMRegister); 3044 %} 3045 ins_pipe( pipe_slow ); 3046 %} 3047 3048 instruct storeV8(memory mem, vecD src) %{ 3049 predicate(n->as_StoreVector()->memory_size() == 8); 3050 match(Set mem (StoreVector mem src)); 3051 ins_cost(145); 3052 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3053 ins_encode %{ 3054 __ movq($mem$$Address, $src$$XMMRegister); 3055 %} 3056 ins_pipe( pipe_slow ); 3057 %} 3058 3059 instruct storeV16(memory mem, vecX src) %{ 3060 predicate(n->as_StoreVector()->memory_size() == 16); 3061 match(Set mem (StoreVector mem src)); 3062 ins_cost(145); 3063 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3064 ins_encode %{ 3065 __ movdqu($mem$$Address, $src$$XMMRegister); 3066 %} 3067 ins_pipe( pipe_slow ); 3068 %} 3069 3070 instruct storeV32(memory mem, vecY src) %{ 3071 predicate(n->as_StoreVector()->memory_size() == 32); 3072 match(Set mem (StoreVector mem src)); 3073 ins_cost(145); 3074 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3075 ins_encode %{ 3076 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3077 %} 3078 ins_pipe( pipe_slow ); 3079 %} 3080 3081 instruct storeV64_dword(memory mem, vecZ src) %{ 3082 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3083 match(Set mem (StoreVector mem src)); 3084 ins_cost(145); 3085 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3086 ins_encode %{ 3087 int vector_len = 2; 3088 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3089 %} 3090 ins_pipe( pipe_slow ); 3091 %} 3092 3093 instruct storeV64_qword(memory mem, vecZ src) %{ 3094 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3095 match(Set mem (StoreVector mem src)); 3096 ins_cost(145); 3097 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3098 ins_encode %{ 3099 int vector_len = 2; 3100 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3101 %} 3102 ins_pipe( pipe_slow ); 3103 %} 3104 3105 // ====================LEGACY REPLICATE======================================= 3106 3107 instruct Repl4B_mem(vecS dst, memory mem) %{ 3108 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3109 match(Set dst (ReplicateB (LoadB mem))); 3110 format %{ "punpcklbw $dst,$mem\n\t" 3111 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3112 ins_encode %{ 3113 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3114 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3115 %} 3116 ins_pipe( pipe_slow ); 3117 %} 3118 3119 instruct Repl8B_mem(vecD dst, memory mem) %{ 3120 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3121 match(Set dst (ReplicateB (LoadB mem))); 3122 format %{ "punpcklbw $dst,$mem\n\t" 3123 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3124 ins_encode %{ 3125 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3126 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3127 %} 3128 ins_pipe( pipe_slow ); 3129 %} 3130 3131 instruct Repl16B(vecX dst, rRegI src) %{ 3132 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3133 match(Set dst (ReplicateB src)); 3134 format %{ "movd $dst,$src\n\t" 3135 "punpcklbw $dst,$dst\n\t" 3136 "pshuflw $dst,$dst,0x00\n\t" 3137 "punpcklqdq $dst,$dst\t! replicate16B" %} 3138 ins_encode %{ 3139 __ movdl($dst$$XMMRegister, $src$$Register); 3140 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3141 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3142 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3143 %} 3144 ins_pipe( pipe_slow ); 3145 %} 3146 3147 instruct Repl16B_mem(vecX dst, memory mem) %{ 3148 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3149 match(Set dst (ReplicateB (LoadB mem))); 3150 format %{ "punpcklbw $dst,$mem\n\t" 3151 "pshuflw $dst,$dst,0x00\n\t" 3152 "punpcklqdq $dst,$dst\t! replicate16B" %} 3153 ins_encode %{ 3154 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3155 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3156 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3157 %} 3158 ins_pipe( pipe_slow ); 3159 %} 3160 3161 instruct Repl32B(vecY dst, rRegI src) %{ 3162 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3163 match(Set dst (ReplicateB src)); 3164 format %{ "movd $dst,$src\n\t" 3165 "punpcklbw $dst,$dst\n\t" 3166 "pshuflw $dst,$dst,0x00\n\t" 3167 "punpcklqdq $dst,$dst\n\t" 3168 "vinserti128_high $dst,$dst\t! replicate32B" %} 3169 ins_encode %{ 3170 __ movdl($dst$$XMMRegister, $src$$Register); 3171 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3172 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3173 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3174 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3175 %} 3176 ins_pipe( pipe_slow ); 3177 %} 3178 3179 instruct Repl32B_mem(vecY dst, memory mem) %{ 3180 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3181 match(Set dst (ReplicateB (LoadB mem))); 3182 format %{ "punpcklbw $dst,$mem\n\t" 3183 "pshuflw $dst,$dst,0x00\n\t" 3184 "punpcklqdq $dst,$dst\n\t" 3185 "vinserti128_high $dst,$dst\t! replicate32B" %} 3186 ins_encode %{ 3187 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3188 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3189 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3190 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3191 %} 3192 ins_pipe( pipe_slow ); 3193 %} 3194 3195 instruct Repl64B(legVecZ dst, rRegI src) %{ 3196 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3197 match(Set dst (ReplicateB src)); 3198 format %{ "movd $dst,$src\n\t" 3199 "punpcklbw $dst,$dst\n\t" 3200 "pshuflw $dst,$dst,0x00\n\t" 3201 "punpcklqdq $dst,$dst\n\t" 3202 "vinserti128_high $dst,$dst\t" 3203 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3204 ins_encode %{ 3205 __ movdl($dst$$XMMRegister, $src$$Register); 3206 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3207 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3208 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3209 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3210 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3211 %} 3212 ins_pipe( pipe_slow ); 3213 %} 3214 3215 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3216 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3217 match(Set dst (ReplicateB (LoadB mem))); 3218 format %{ "punpcklbw $dst,$mem\n\t" 3219 "pshuflw $dst,$dst,0x00\n\t" 3220 "punpcklqdq $dst,$dst\n\t" 3221 "vinserti128_high $dst,$dst\t" 3222 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3223 ins_encode %{ 3224 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3225 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3226 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3227 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3228 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3229 %} 3230 ins_pipe( pipe_slow ); 3231 %} 3232 3233 instruct Repl16B_imm(vecX dst, immI con) %{ 3234 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3235 match(Set dst (ReplicateB con)); 3236 format %{ "movq $dst,[$constantaddress]\n\t" 3237 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3238 ins_encode %{ 3239 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3240 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3241 %} 3242 ins_pipe( pipe_slow ); 3243 %} 3244 3245 instruct Repl32B_imm(vecY dst, immI con) %{ 3246 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3247 match(Set dst (ReplicateB con)); 3248 format %{ "movq $dst,[$constantaddress]\n\t" 3249 "punpcklqdq $dst,$dst\n\t" 3250 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3251 ins_encode %{ 3252 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3253 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3254 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3255 %} 3256 ins_pipe( pipe_slow ); 3257 %} 3258 3259 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3260 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3261 match(Set dst (ReplicateB con)); 3262 format %{ "movq $dst,[$constantaddress]\n\t" 3263 "punpcklqdq $dst,$dst\n\t" 3264 "vinserti128_high $dst,$dst\t" 3265 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3266 ins_encode %{ 3267 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3268 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3269 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3270 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3271 %} 3272 ins_pipe( pipe_slow ); 3273 %} 3274 3275 instruct Repl4S(vecD dst, rRegI src) %{ 3276 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3277 match(Set dst (ReplicateS src)); 3278 format %{ "movd $dst,$src\n\t" 3279 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3280 ins_encode %{ 3281 __ movdl($dst$$XMMRegister, $src$$Register); 3282 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3283 %} 3284 ins_pipe( pipe_slow ); 3285 %} 3286 3287 instruct Repl4S_mem(vecD dst, memory mem) %{ 3288 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3289 match(Set dst (ReplicateS (LoadS mem))); 3290 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3291 ins_encode %{ 3292 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3293 %} 3294 ins_pipe( pipe_slow ); 3295 %} 3296 3297 instruct Repl8S(vecX dst, rRegI src) %{ 3298 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3299 match(Set dst (ReplicateS src)); 3300 format %{ "movd $dst,$src\n\t" 3301 "pshuflw $dst,$dst,0x00\n\t" 3302 "punpcklqdq $dst,$dst\t! replicate8S" %} 3303 ins_encode %{ 3304 __ movdl($dst$$XMMRegister, $src$$Register); 3305 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3306 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3307 %} 3308 ins_pipe( pipe_slow ); 3309 %} 3310 3311 instruct Repl8S_mem(vecX dst, memory mem) %{ 3312 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3313 match(Set dst (ReplicateS (LoadS mem))); 3314 format %{ "pshuflw $dst,$mem,0x00\n\t" 3315 "punpcklqdq $dst,$dst\t! replicate8S" %} 3316 ins_encode %{ 3317 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3318 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3319 %} 3320 ins_pipe( pipe_slow ); 3321 %} 3322 3323 instruct Repl8S_imm(vecX dst, immI con) %{ 3324 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3325 match(Set dst (ReplicateS con)); 3326 format %{ "movq $dst,[$constantaddress]\n\t" 3327 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3328 ins_encode %{ 3329 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3330 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3331 %} 3332 ins_pipe( pipe_slow ); 3333 %} 3334 3335 instruct Repl16S(vecY dst, rRegI src) %{ 3336 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3337 match(Set dst (ReplicateS src)); 3338 format %{ "movd $dst,$src\n\t" 3339 "pshuflw $dst,$dst,0x00\n\t" 3340 "punpcklqdq $dst,$dst\n\t" 3341 "vinserti128_high $dst,$dst\t! replicate16S" %} 3342 ins_encode %{ 3343 __ movdl($dst$$XMMRegister, $src$$Register); 3344 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3345 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3346 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3347 %} 3348 ins_pipe( pipe_slow ); 3349 %} 3350 3351 instruct Repl16S_mem(vecY dst, memory mem) %{ 3352 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3353 match(Set dst (ReplicateS (LoadS mem))); 3354 format %{ "pshuflw $dst,$mem,0x00\n\t" 3355 "punpcklqdq $dst,$dst\n\t" 3356 "vinserti128_high $dst,$dst\t! replicate16S" %} 3357 ins_encode %{ 3358 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3359 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3360 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3361 %} 3362 ins_pipe( pipe_slow ); 3363 %} 3364 3365 instruct Repl16S_imm(vecY dst, immI con) %{ 3366 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3367 match(Set dst (ReplicateS con)); 3368 format %{ "movq $dst,[$constantaddress]\n\t" 3369 "punpcklqdq $dst,$dst\n\t" 3370 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3371 ins_encode %{ 3372 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3373 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3374 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3375 %} 3376 ins_pipe( pipe_slow ); 3377 %} 3378 3379 instruct Repl32S(legVecZ dst, rRegI src) %{ 3380 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3381 match(Set dst (ReplicateS src)); 3382 format %{ "movd $dst,$src\n\t" 3383 "pshuflw $dst,$dst,0x00\n\t" 3384 "punpcklqdq $dst,$dst\n\t" 3385 "vinserti128_high $dst,$dst\t" 3386 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3387 ins_encode %{ 3388 __ movdl($dst$$XMMRegister, $src$$Register); 3389 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3390 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3391 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3392 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3393 %} 3394 ins_pipe( pipe_slow ); 3395 %} 3396 3397 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3398 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3399 match(Set dst (ReplicateS (LoadS mem))); 3400 format %{ "pshuflw $dst,$mem,0x00\n\t" 3401 "punpcklqdq $dst,$dst\n\t" 3402 "vinserti128_high $dst,$dst\t" 3403 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3404 ins_encode %{ 3405 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3406 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3407 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3408 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3409 %} 3410 ins_pipe( pipe_slow ); 3411 %} 3412 3413 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3414 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3415 match(Set dst (ReplicateS con)); 3416 format %{ "movq $dst,[$constantaddress]\n\t" 3417 "punpcklqdq $dst,$dst\n\t" 3418 "vinserti128_high $dst,$dst\t" 3419 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3420 ins_encode %{ 3421 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3422 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3423 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3424 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3425 %} 3426 ins_pipe( pipe_slow ); 3427 %} 3428 3429 instruct Repl4I(vecX dst, rRegI src) %{ 3430 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3431 match(Set dst (ReplicateI src)); 3432 format %{ "movd $dst,$src\n\t" 3433 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3434 ins_encode %{ 3435 __ movdl($dst$$XMMRegister, $src$$Register); 3436 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3437 %} 3438 ins_pipe( pipe_slow ); 3439 %} 3440 3441 instruct Repl4I_mem(vecX dst, memory mem) %{ 3442 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3443 match(Set dst (ReplicateI (LoadI mem))); 3444 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3445 ins_encode %{ 3446 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3447 %} 3448 ins_pipe( pipe_slow ); 3449 %} 3450 3451 instruct Repl8I(vecY dst, rRegI src) %{ 3452 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3453 match(Set dst (ReplicateI src)); 3454 format %{ "movd $dst,$src\n\t" 3455 "pshufd $dst,$dst,0x00\n\t" 3456 "vinserti128_high $dst,$dst\t! replicate8I" %} 3457 ins_encode %{ 3458 __ movdl($dst$$XMMRegister, $src$$Register); 3459 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3460 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3461 %} 3462 ins_pipe( pipe_slow ); 3463 %} 3464 3465 instruct Repl8I_mem(vecY dst, memory mem) %{ 3466 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3467 match(Set dst (ReplicateI (LoadI mem))); 3468 format %{ "pshufd $dst,$mem,0x00\n\t" 3469 "vinserti128_high $dst,$dst\t! replicate8I" %} 3470 ins_encode %{ 3471 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3472 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3473 %} 3474 ins_pipe( pipe_slow ); 3475 %} 3476 3477 instruct Repl16I(legVecZ dst, rRegI src) %{ 3478 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3479 match(Set dst (ReplicateI src)); 3480 format %{ "movd $dst,$src\n\t" 3481 "pshufd $dst,$dst,0x00\n\t" 3482 "vinserti128_high $dst,$dst\t" 3483 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3484 ins_encode %{ 3485 __ movdl($dst$$XMMRegister, $src$$Register); 3486 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3487 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3488 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3489 %} 3490 ins_pipe( pipe_slow ); 3491 %} 3492 3493 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3494 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3495 match(Set dst (ReplicateI (LoadI mem))); 3496 format %{ "pshufd $dst,$mem,0x00\n\t" 3497 "vinserti128_high $dst,$dst\t" 3498 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3499 ins_encode %{ 3500 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3501 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3502 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3503 %} 3504 ins_pipe( pipe_slow ); 3505 %} 3506 3507 instruct Repl4I_imm(vecX dst, immI con) %{ 3508 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3509 match(Set dst (ReplicateI con)); 3510 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3511 "punpcklqdq $dst,$dst" %} 3512 ins_encode %{ 3513 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3514 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 instruct Repl8I_imm(vecY dst, immI con) %{ 3520 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3521 match(Set dst (ReplicateI con)); 3522 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3523 "punpcklqdq $dst,$dst\n\t" 3524 "vinserti128_high $dst,$dst" %} 3525 ins_encode %{ 3526 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3527 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3528 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3529 %} 3530 ins_pipe( pipe_slow ); 3531 %} 3532 3533 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3534 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3535 match(Set dst (ReplicateI con)); 3536 format %{ "movq $dst,[$constantaddress]\t" 3537 "punpcklqdq $dst,$dst\n\t" 3538 "vinserti128_high $dst,$dst" 3539 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3540 ins_encode %{ 3541 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3542 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3543 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3544 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3545 %} 3546 ins_pipe( pipe_slow ); 3547 %} 3548 3549 // Long could be loaded into xmm register directly from memory. 3550 instruct Repl2L_mem(vecX dst, memory mem) %{ 3551 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3552 match(Set dst (ReplicateL (LoadL mem))); 3553 format %{ "movq $dst,$mem\n\t" 3554 "punpcklqdq $dst,$dst\t! replicate2L" %} 3555 ins_encode %{ 3556 __ movq($dst$$XMMRegister, $mem$$Address); 3557 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3558 %} 3559 ins_pipe( pipe_slow ); 3560 %} 3561 3562 // Replicate long (8 byte) scalar to be vector 3563 #ifdef _LP64 3564 instruct Repl4L(vecY dst, rRegL src) %{ 3565 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3566 match(Set dst (ReplicateL src)); 3567 format %{ "movdq $dst,$src\n\t" 3568 "punpcklqdq $dst,$dst\n\t" 3569 "vinserti128_high $dst,$dst\t! replicate4L" %} 3570 ins_encode %{ 3571 __ movdq($dst$$XMMRegister, $src$$Register); 3572 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3573 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3574 %} 3575 ins_pipe( pipe_slow ); 3576 %} 3577 3578 instruct Repl8L(legVecZ dst, rRegL src) %{ 3579 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3580 match(Set dst (ReplicateL src)); 3581 format %{ "movdq $dst,$src\n\t" 3582 "punpcklqdq $dst,$dst\n\t" 3583 "vinserti128_high $dst,$dst\t" 3584 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3585 ins_encode %{ 3586 __ movdq($dst$$XMMRegister, $src$$Register); 3587 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3588 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3589 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3590 %} 3591 ins_pipe( pipe_slow ); 3592 %} 3593 #else // _LP64 3594 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3595 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3596 match(Set dst (ReplicateL src)); 3597 effect(TEMP dst, USE src, TEMP tmp); 3598 format %{ "movdl $dst,$src.lo\n\t" 3599 "movdl $tmp,$src.hi\n\t" 3600 "punpckldq $dst,$tmp\n\t" 3601 "punpcklqdq $dst,$dst\n\t" 3602 "vinserti128_high $dst,$dst\t! replicate4L" %} 3603 ins_encode %{ 3604 __ movdl($dst$$XMMRegister, $src$$Register); 3605 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3606 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3607 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3608 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3609 %} 3610 ins_pipe( pipe_slow ); 3611 %} 3612 3613 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3614 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3615 match(Set dst (ReplicateL src)); 3616 effect(TEMP dst, USE src, TEMP tmp); 3617 format %{ "movdl $dst,$src.lo\n\t" 3618 "movdl $tmp,$src.hi\n\t" 3619 "punpckldq $dst,$tmp\n\t" 3620 "punpcklqdq $dst,$dst\n\t" 3621 "vinserti128_high $dst,$dst\t" 3622 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3623 ins_encode %{ 3624 __ movdl($dst$$XMMRegister, $src$$Register); 3625 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3626 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3627 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3628 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3629 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3630 %} 3631 ins_pipe( pipe_slow ); 3632 %} 3633 #endif // _LP64 3634 3635 instruct Repl4L_imm(vecY dst, immL con) %{ 3636 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3637 match(Set dst (ReplicateL con)); 3638 format %{ "movq $dst,[$constantaddress]\n\t" 3639 "punpcklqdq $dst,$dst\n\t" 3640 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3641 ins_encode %{ 3642 __ movq($dst$$XMMRegister, $constantaddress($con)); 3643 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3644 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3645 %} 3646 ins_pipe( pipe_slow ); 3647 %} 3648 3649 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3650 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3651 match(Set dst (ReplicateL con)); 3652 format %{ "movq $dst,[$constantaddress]\n\t" 3653 "punpcklqdq $dst,$dst\n\t" 3654 "vinserti128_high $dst,$dst\t" 3655 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3656 ins_encode %{ 3657 __ movq($dst$$XMMRegister, $constantaddress($con)); 3658 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3659 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3660 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 instruct Repl4L_mem(vecY dst, memory mem) %{ 3666 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3667 match(Set dst (ReplicateL (LoadL mem))); 3668 format %{ "movq $dst,$mem\n\t" 3669 "punpcklqdq $dst,$dst\n\t" 3670 "vinserti128_high $dst,$dst\t! replicate4L" %} 3671 ins_encode %{ 3672 __ movq($dst$$XMMRegister, $mem$$Address); 3673 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3674 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3675 %} 3676 ins_pipe( pipe_slow ); 3677 %} 3678 3679 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3680 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3681 match(Set dst (ReplicateL (LoadL mem))); 3682 format %{ "movq $dst,$mem\n\t" 3683 "punpcklqdq $dst,$dst\n\t" 3684 "vinserti128_high $dst,$dst\t" 3685 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3686 ins_encode %{ 3687 __ movq($dst$$XMMRegister, $mem$$Address); 3688 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3689 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3690 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct Repl2F_mem(vecD dst, memory mem) %{ 3696 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3697 match(Set dst (ReplicateF (LoadF mem))); 3698 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3699 ins_encode %{ 3700 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3701 %} 3702 ins_pipe( pipe_slow ); 3703 %} 3704 3705 instruct Repl4F_mem(vecX dst, memory mem) %{ 3706 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3707 match(Set dst (ReplicateF (LoadF mem))); 3708 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3709 ins_encode %{ 3710 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3711 %} 3712 ins_pipe( pipe_slow ); 3713 %} 3714 3715 instruct Repl8F(vecY dst, vlRegF src) %{ 3716 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3717 match(Set dst (ReplicateF src)); 3718 format %{ "pshufd $dst,$src,0x00\n\t" 3719 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3720 ins_encode %{ 3721 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3722 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3723 %} 3724 ins_pipe( pipe_slow ); 3725 %} 3726 3727 instruct Repl8F_mem(vecY dst, memory mem) %{ 3728 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3729 match(Set dst (ReplicateF (LoadF mem))); 3730 format %{ "pshufd $dst,$mem,0x00\n\t" 3731 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3732 ins_encode %{ 3733 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3734 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3735 %} 3736 ins_pipe( pipe_slow ); 3737 %} 3738 3739 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3740 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3741 match(Set dst (ReplicateF src)); 3742 format %{ "pshufd $dst,$src,0x00\n\t" 3743 "vinsertf128_high $dst,$dst\t" 3744 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3745 ins_encode %{ 3746 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3747 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3748 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3749 %} 3750 ins_pipe( pipe_slow ); 3751 %} 3752 3753 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3754 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3755 match(Set dst (ReplicateF (LoadF mem))); 3756 format %{ "pshufd $dst,$mem,0x00\n\t" 3757 "vinsertf128_high $dst,$dst\t" 3758 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3759 ins_encode %{ 3760 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3761 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3762 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3763 %} 3764 ins_pipe( pipe_slow ); 3765 %} 3766 3767 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3768 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3769 match(Set dst (ReplicateF zero)); 3770 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3771 ins_encode %{ 3772 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3773 %} 3774 ins_pipe( fpu_reg_reg ); 3775 %} 3776 3777 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3778 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3779 match(Set dst (ReplicateF zero)); 3780 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3781 ins_encode %{ 3782 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3783 %} 3784 ins_pipe( fpu_reg_reg ); 3785 %} 3786 3787 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3788 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3789 match(Set dst (ReplicateF zero)); 3790 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3791 ins_encode %{ 3792 int vector_len = 1; 3793 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3794 %} 3795 ins_pipe( fpu_reg_reg ); 3796 %} 3797 3798 instruct Repl2D_mem(vecX dst, memory mem) %{ 3799 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3800 match(Set dst (ReplicateD (LoadD mem))); 3801 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3802 ins_encode %{ 3803 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3804 %} 3805 ins_pipe( pipe_slow ); 3806 %} 3807 3808 instruct Repl4D(vecY dst, vlRegD src) %{ 3809 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3810 match(Set dst (ReplicateD src)); 3811 format %{ "pshufd $dst,$src,0x44\n\t" 3812 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3813 ins_encode %{ 3814 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3815 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 instruct Repl4D_mem(vecY dst, memory mem) %{ 3821 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3822 match(Set dst (ReplicateD (LoadD mem))); 3823 format %{ "pshufd $dst,$mem,0x44\n\t" 3824 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3825 ins_encode %{ 3826 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3827 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3833 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3834 match(Set dst (ReplicateD src)); 3835 format %{ "pshufd $dst,$src,0x44\n\t" 3836 "vinsertf128_high $dst,$dst\t" 3837 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3838 ins_encode %{ 3839 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3840 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3841 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3847 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3848 match(Set dst (ReplicateD (LoadD mem))); 3849 format %{ "pshufd $dst,$mem,0x44\n\t" 3850 "vinsertf128_high $dst,$dst\t" 3851 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3852 ins_encode %{ 3853 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3854 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3855 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3856 %} 3857 ins_pipe( pipe_slow ); 3858 %} 3859 3860 // Replicate double (8 byte) scalar zero to be vector 3861 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3862 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3863 match(Set dst (ReplicateD zero)); 3864 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3865 ins_encode %{ 3866 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3867 %} 3868 ins_pipe( fpu_reg_reg ); 3869 %} 3870 3871 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3872 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3873 match(Set dst (ReplicateD zero)); 3874 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3875 ins_encode %{ 3876 int vector_len = 1; 3877 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3878 %} 3879 ins_pipe( fpu_reg_reg ); 3880 %} 3881 3882 // ====================GENERIC REPLICATE========================================== 3883 3884 // Replicate byte scalar to be vector 3885 instruct Repl4B(vecS dst, rRegI src) %{ 3886 predicate(n->as_Vector()->length() == 4); 3887 match(Set dst (ReplicateB src)); 3888 format %{ "movd $dst,$src\n\t" 3889 "punpcklbw $dst,$dst\n\t" 3890 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3891 ins_encode %{ 3892 __ movdl($dst$$XMMRegister, $src$$Register); 3893 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3894 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3895 %} 3896 ins_pipe( pipe_slow ); 3897 %} 3898 3899 instruct Repl8B(vecD dst, rRegI src) %{ 3900 predicate(n->as_Vector()->length() == 8); 3901 match(Set dst (ReplicateB src)); 3902 format %{ "movd $dst,$src\n\t" 3903 "punpcklbw $dst,$dst\n\t" 3904 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3905 ins_encode %{ 3906 __ movdl($dst$$XMMRegister, $src$$Register); 3907 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3908 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3909 %} 3910 ins_pipe( pipe_slow ); 3911 %} 3912 3913 // Replicate byte scalar immediate to be vector by loading from const table. 3914 instruct Repl4B_imm(vecS dst, immI con) %{ 3915 predicate(n->as_Vector()->length() == 4); 3916 match(Set dst (ReplicateB con)); 3917 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3918 ins_encode %{ 3919 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3920 %} 3921 ins_pipe( pipe_slow ); 3922 %} 3923 3924 instruct Repl8B_imm(vecD dst, immI con) %{ 3925 predicate(n->as_Vector()->length() == 8); 3926 match(Set dst (ReplicateB con)); 3927 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3928 ins_encode %{ 3929 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3930 %} 3931 ins_pipe( pipe_slow ); 3932 %} 3933 3934 // Replicate byte scalar zero to be vector 3935 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3936 predicate(n->as_Vector()->length() == 4); 3937 match(Set dst (ReplicateB zero)); 3938 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3939 ins_encode %{ 3940 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3941 %} 3942 ins_pipe( fpu_reg_reg ); 3943 %} 3944 3945 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3946 predicate(n->as_Vector()->length() == 8); 3947 match(Set dst (ReplicateB zero)); 3948 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3949 ins_encode %{ 3950 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3951 %} 3952 ins_pipe( fpu_reg_reg ); 3953 %} 3954 3955 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3956 predicate(n->as_Vector()->length() == 16); 3957 match(Set dst (ReplicateB zero)); 3958 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3959 ins_encode %{ 3960 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3961 %} 3962 ins_pipe( fpu_reg_reg ); 3963 %} 3964 3965 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3966 predicate(n->as_Vector()->length() == 32); 3967 match(Set dst (ReplicateB zero)); 3968 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3969 ins_encode %{ 3970 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3971 int vector_len = 1; 3972 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3973 %} 3974 ins_pipe( fpu_reg_reg ); 3975 %} 3976 3977 // Replicate char/short (2 byte) scalar to be vector 3978 instruct Repl2S(vecS dst, rRegI src) %{ 3979 predicate(n->as_Vector()->length() == 2); 3980 match(Set dst (ReplicateS src)); 3981 format %{ "movd $dst,$src\n\t" 3982 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3983 ins_encode %{ 3984 __ movdl($dst$$XMMRegister, $src$$Register); 3985 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3986 %} 3987 ins_pipe( fpu_reg_reg ); 3988 %} 3989 3990 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3991 instruct Repl2S_imm(vecS dst, immI con) %{ 3992 predicate(n->as_Vector()->length() == 2); 3993 match(Set dst (ReplicateS con)); 3994 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3995 ins_encode %{ 3996 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3997 %} 3998 ins_pipe( fpu_reg_reg ); 3999 %} 4000 4001 instruct Repl4S_imm(vecD dst, immI con) %{ 4002 predicate(n->as_Vector()->length() == 4); 4003 match(Set dst (ReplicateS con)); 4004 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4005 ins_encode %{ 4006 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4007 %} 4008 ins_pipe( fpu_reg_reg ); 4009 %} 4010 4011 // Replicate char/short (2 byte) scalar zero to be vector 4012 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4013 predicate(n->as_Vector()->length() == 2); 4014 match(Set dst (ReplicateS zero)); 4015 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4016 ins_encode %{ 4017 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4018 %} 4019 ins_pipe( fpu_reg_reg ); 4020 %} 4021 4022 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4023 predicate(n->as_Vector()->length() == 4); 4024 match(Set dst (ReplicateS zero)); 4025 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4026 ins_encode %{ 4027 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4028 %} 4029 ins_pipe( fpu_reg_reg ); 4030 %} 4031 4032 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4033 predicate(n->as_Vector()->length() == 8); 4034 match(Set dst (ReplicateS zero)); 4035 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4036 ins_encode %{ 4037 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4038 %} 4039 ins_pipe( fpu_reg_reg ); 4040 %} 4041 4042 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4043 predicate(n->as_Vector()->length() == 16); 4044 match(Set dst (ReplicateS zero)); 4045 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4046 ins_encode %{ 4047 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4048 int vector_len = 1; 4049 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4050 %} 4051 ins_pipe( fpu_reg_reg ); 4052 %} 4053 4054 // Replicate integer (4 byte) scalar to be vector 4055 instruct Repl2I(vecD dst, rRegI src) %{ 4056 predicate(n->as_Vector()->length() == 2); 4057 match(Set dst (ReplicateI src)); 4058 format %{ "movd $dst,$src\n\t" 4059 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4060 ins_encode %{ 4061 __ movdl($dst$$XMMRegister, $src$$Register); 4062 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4063 %} 4064 ins_pipe( fpu_reg_reg ); 4065 %} 4066 4067 // Integer could be loaded into xmm register directly from memory. 4068 instruct Repl2I_mem(vecD dst, memory mem) %{ 4069 predicate(n->as_Vector()->length() == 2); 4070 match(Set dst (ReplicateI (LoadI mem))); 4071 format %{ "movd $dst,$mem\n\t" 4072 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4073 ins_encode %{ 4074 __ movdl($dst$$XMMRegister, $mem$$Address); 4075 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4076 %} 4077 ins_pipe( fpu_reg_reg ); 4078 %} 4079 4080 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4081 instruct Repl2I_imm(vecD dst, immI con) %{ 4082 predicate(n->as_Vector()->length() == 2); 4083 match(Set dst (ReplicateI con)); 4084 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4085 ins_encode %{ 4086 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4087 %} 4088 ins_pipe( fpu_reg_reg ); 4089 %} 4090 4091 // Replicate integer (4 byte) scalar zero to be vector 4092 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4093 predicate(n->as_Vector()->length() == 2); 4094 match(Set dst (ReplicateI zero)); 4095 format %{ "pxor $dst,$dst\t! replicate2I" %} 4096 ins_encode %{ 4097 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4098 %} 4099 ins_pipe( fpu_reg_reg ); 4100 %} 4101 4102 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4103 predicate(n->as_Vector()->length() == 4); 4104 match(Set dst (ReplicateI zero)); 4105 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4106 ins_encode %{ 4107 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4108 %} 4109 ins_pipe( fpu_reg_reg ); 4110 %} 4111 4112 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4113 predicate(n->as_Vector()->length() == 8); 4114 match(Set dst (ReplicateI zero)); 4115 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4116 ins_encode %{ 4117 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4118 int vector_len = 1; 4119 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4120 %} 4121 ins_pipe( fpu_reg_reg ); 4122 %} 4123 4124 // Replicate long (8 byte) scalar to be vector 4125 #ifdef _LP64 4126 instruct Repl2L(vecX dst, rRegL src) %{ 4127 predicate(n->as_Vector()->length() == 2); 4128 match(Set dst (ReplicateL src)); 4129 format %{ "movdq $dst,$src\n\t" 4130 "punpcklqdq $dst,$dst\t! replicate2L" %} 4131 ins_encode %{ 4132 __ movdq($dst$$XMMRegister, $src$$Register); 4133 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4134 %} 4135 ins_pipe( pipe_slow ); 4136 %} 4137 #else // _LP64 4138 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4139 predicate(n->as_Vector()->length() == 2); 4140 match(Set dst (ReplicateL src)); 4141 effect(TEMP dst, USE src, TEMP tmp); 4142 format %{ "movdl $dst,$src.lo\n\t" 4143 "movdl $tmp,$src.hi\n\t" 4144 "punpckldq $dst,$tmp\n\t" 4145 "punpcklqdq $dst,$dst\t! replicate2L"%} 4146 ins_encode %{ 4147 __ movdl($dst$$XMMRegister, $src$$Register); 4148 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4149 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4150 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4151 %} 4152 ins_pipe( pipe_slow ); 4153 %} 4154 #endif // _LP64 4155 4156 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4157 instruct Repl2L_imm(vecX dst, immL con) %{ 4158 predicate(n->as_Vector()->length() == 2); 4159 match(Set dst (ReplicateL con)); 4160 format %{ "movq $dst,[$constantaddress]\n\t" 4161 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4162 ins_encode %{ 4163 __ movq($dst$$XMMRegister, $constantaddress($con)); 4164 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4165 %} 4166 ins_pipe( pipe_slow ); 4167 %} 4168 4169 // Replicate long (8 byte) scalar zero to be vector 4170 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4171 predicate(n->as_Vector()->length() == 2); 4172 match(Set dst (ReplicateL zero)); 4173 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4174 ins_encode %{ 4175 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4176 %} 4177 ins_pipe( fpu_reg_reg ); 4178 %} 4179 4180 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4181 predicate(n->as_Vector()->length() == 4); 4182 match(Set dst (ReplicateL zero)); 4183 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4184 ins_encode %{ 4185 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4186 int vector_len = 1; 4187 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4188 %} 4189 ins_pipe( fpu_reg_reg ); 4190 %} 4191 4192 // Replicate float (4 byte) scalar to be vector 4193 instruct Repl2F(vecD dst, vlRegF src) %{ 4194 predicate(n->as_Vector()->length() == 2); 4195 match(Set dst (ReplicateF src)); 4196 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4197 ins_encode %{ 4198 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4199 %} 4200 ins_pipe( fpu_reg_reg ); 4201 %} 4202 4203 instruct Repl4F(vecX dst, vlRegF src) %{ 4204 predicate(n->as_Vector()->length() == 4); 4205 match(Set dst (ReplicateF src)); 4206 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4207 ins_encode %{ 4208 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4209 %} 4210 ins_pipe( pipe_slow ); 4211 %} 4212 4213 // Replicate double (8 bytes) scalar to be vector 4214 instruct Repl2D(vecX dst, vlRegD src) %{ 4215 predicate(n->as_Vector()->length() == 2); 4216 match(Set dst (ReplicateD src)); 4217 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4218 ins_encode %{ 4219 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4220 %} 4221 ins_pipe( pipe_slow ); 4222 %} 4223 4224 // ====================EVEX REPLICATE============================================= 4225 4226 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4227 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4228 match(Set dst (ReplicateB (LoadB mem))); 4229 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4230 ins_encode %{ 4231 int vector_len = 0; 4232 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4233 %} 4234 ins_pipe( pipe_slow ); 4235 %} 4236 4237 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4238 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4239 match(Set dst (ReplicateB (LoadB mem))); 4240 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4241 ins_encode %{ 4242 int vector_len = 0; 4243 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4244 %} 4245 ins_pipe( pipe_slow ); 4246 %} 4247 4248 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4249 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4250 match(Set dst (ReplicateB src)); 4251 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4252 ins_encode %{ 4253 int vector_len = 0; 4254 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4255 %} 4256 ins_pipe( pipe_slow ); 4257 %} 4258 4259 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4260 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4261 match(Set dst (ReplicateB (LoadB mem))); 4262 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4263 ins_encode %{ 4264 int vector_len = 0; 4265 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4266 %} 4267 ins_pipe( pipe_slow ); 4268 %} 4269 4270 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4271 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4272 match(Set dst (ReplicateB src)); 4273 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4274 ins_encode %{ 4275 int vector_len = 1; 4276 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4277 %} 4278 ins_pipe( pipe_slow ); 4279 %} 4280 4281 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4282 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4283 match(Set dst (ReplicateB (LoadB mem))); 4284 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4285 ins_encode %{ 4286 int vector_len = 1; 4287 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4288 %} 4289 ins_pipe( pipe_slow ); 4290 %} 4291 4292 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4293 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4294 match(Set dst (ReplicateB src)); 4295 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4296 ins_encode %{ 4297 int vector_len = 2; 4298 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4299 %} 4300 ins_pipe( pipe_slow ); 4301 %} 4302 4303 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4304 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4305 match(Set dst (ReplicateB (LoadB mem))); 4306 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4307 ins_encode %{ 4308 int vector_len = 2; 4309 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4310 %} 4311 ins_pipe( pipe_slow ); 4312 %} 4313 4314 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4315 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4316 match(Set dst (ReplicateB con)); 4317 format %{ "movq $dst,[$constantaddress]\n\t" 4318 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4319 ins_encode %{ 4320 int vector_len = 0; 4321 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4322 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4323 %} 4324 ins_pipe( pipe_slow ); 4325 %} 4326 4327 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4328 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4329 match(Set dst (ReplicateB con)); 4330 format %{ "movq $dst,[$constantaddress]\n\t" 4331 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4332 ins_encode %{ 4333 int vector_len = 1; 4334 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4335 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4336 %} 4337 ins_pipe( pipe_slow ); 4338 %} 4339 4340 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4341 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4342 match(Set dst (ReplicateB con)); 4343 format %{ "movq $dst,[$constantaddress]\n\t" 4344 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4345 ins_encode %{ 4346 int vector_len = 2; 4347 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4348 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4349 %} 4350 ins_pipe( pipe_slow ); 4351 %} 4352 4353 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4354 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4355 match(Set dst (ReplicateB zero)); 4356 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4357 ins_encode %{ 4358 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4359 int vector_len = 2; 4360 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4361 %} 4362 ins_pipe( fpu_reg_reg ); 4363 %} 4364 4365 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4366 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4367 match(Set dst (ReplicateS src)); 4368 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4369 ins_encode %{ 4370 int vector_len = 0; 4371 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4372 %} 4373 ins_pipe( pipe_slow ); 4374 %} 4375 4376 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4377 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4378 match(Set dst (ReplicateS (LoadS mem))); 4379 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4380 ins_encode %{ 4381 int vector_len = 0; 4382 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4383 %} 4384 ins_pipe( pipe_slow ); 4385 %} 4386 4387 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4388 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4389 match(Set dst (ReplicateS src)); 4390 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4391 ins_encode %{ 4392 int vector_len = 0; 4393 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4399 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4400 match(Set dst (ReplicateS (LoadS mem))); 4401 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4402 ins_encode %{ 4403 int vector_len = 0; 4404 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4405 %} 4406 ins_pipe( pipe_slow ); 4407 %} 4408 4409 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4410 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4411 match(Set dst (ReplicateS src)); 4412 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4413 ins_encode %{ 4414 int vector_len = 1; 4415 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4416 %} 4417 ins_pipe( pipe_slow ); 4418 %} 4419 4420 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4421 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4422 match(Set dst (ReplicateS (LoadS mem))); 4423 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4424 ins_encode %{ 4425 int vector_len = 1; 4426 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4427 %} 4428 ins_pipe( pipe_slow ); 4429 %} 4430 4431 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4432 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4433 match(Set dst (ReplicateS src)); 4434 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4435 ins_encode %{ 4436 int vector_len = 2; 4437 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4438 %} 4439 ins_pipe( pipe_slow ); 4440 %} 4441 4442 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4443 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4444 match(Set dst (ReplicateS (LoadS mem))); 4445 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4446 ins_encode %{ 4447 int vector_len = 2; 4448 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4449 %} 4450 ins_pipe( pipe_slow ); 4451 %} 4452 4453 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4454 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4455 match(Set dst (ReplicateS con)); 4456 format %{ "movq $dst,[$constantaddress]\n\t" 4457 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4458 ins_encode %{ 4459 int vector_len = 0; 4460 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4461 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4462 %} 4463 ins_pipe( pipe_slow ); 4464 %} 4465 4466 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4467 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4468 match(Set dst (ReplicateS con)); 4469 format %{ "movq $dst,[$constantaddress]\n\t" 4470 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4471 ins_encode %{ 4472 int vector_len = 1; 4473 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4474 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4480 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4481 match(Set dst (ReplicateS con)); 4482 format %{ "movq $dst,[$constantaddress]\n\t" 4483 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4484 ins_encode %{ 4485 int vector_len = 2; 4486 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4487 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4493 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4494 match(Set dst (ReplicateS zero)); 4495 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4496 ins_encode %{ 4497 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4498 int vector_len = 2; 4499 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4500 %} 4501 ins_pipe( fpu_reg_reg ); 4502 %} 4503 4504 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4505 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4506 match(Set dst (ReplicateI src)); 4507 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4508 ins_encode %{ 4509 int vector_len = 0; 4510 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4511 %} 4512 ins_pipe( pipe_slow ); 4513 %} 4514 4515 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4516 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4517 match(Set dst (ReplicateI (LoadI mem))); 4518 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4519 ins_encode %{ 4520 int vector_len = 0; 4521 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4522 %} 4523 ins_pipe( pipe_slow ); 4524 %} 4525 4526 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4527 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4528 match(Set dst (ReplicateI src)); 4529 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4530 ins_encode %{ 4531 int vector_len = 1; 4532 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4533 %} 4534 ins_pipe( pipe_slow ); 4535 %} 4536 4537 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4538 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4539 match(Set dst (ReplicateI (LoadI mem))); 4540 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4541 ins_encode %{ 4542 int vector_len = 1; 4543 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4544 %} 4545 ins_pipe( pipe_slow ); 4546 %} 4547 4548 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4549 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4550 match(Set dst (ReplicateI src)); 4551 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4552 ins_encode %{ 4553 int vector_len = 2; 4554 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4560 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4561 match(Set dst (ReplicateI (LoadI mem))); 4562 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4563 ins_encode %{ 4564 int vector_len = 2; 4565 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4566 %} 4567 ins_pipe( pipe_slow ); 4568 %} 4569 4570 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4571 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4572 match(Set dst (ReplicateI con)); 4573 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4574 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4575 ins_encode %{ 4576 int vector_len = 0; 4577 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4578 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4579 %} 4580 ins_pipe( pipe_slow ); 4581 %} 4582 4583 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4584 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4585 match(Set dst (ReplicateI con)); 4586 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4587 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4588 ins_encode %{ 4589 int vector_len = 1; 4590 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4591 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4592 %} 4593 ins_pipe( pipe_slow ); 4594 %} 4595 4596 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4597 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4598 match(Set dst (ReplicateI con)); 4599 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4600 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4601 ins_encode %{ 4602 int vector_len = 2; 4603 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4604 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4605 %} 4606 ins_pipe( pipe_slow ); 4607 %} 4608 4609 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4610 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4611 match(Set dst (ReplicateI zero)); 4612 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4613 ins_encode %{ 4614 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4615 int vector_len = 2; 4616 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4617 %} 4618 ins_pipe( fpu_reg_reg ); 4619 %} 4620 4621 // Replicate long (8 byte) scalar to be vector 4622 #ifdef _LP64 4623 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4624 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4625 match(Set dst (ReplicateL src)); 4626 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4627 ins_encode %{ 4628 int vector_len = 1; 4629 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4630 %} 4631 ins_pipe( pipe_slow ); 4632 %} 4633 4634 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4635 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4636 match(Set dst (ReplicateL src)); 4637 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4638 ins_encode %{ 4639 int vector_len = 2; 4640 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4641 %} 4642 ins_pipe( pipe_slow ); 4643 %} 4644 #else // _LP64 4645 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4646 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4647 match(Set dst (ReplicateL src)); 4648 effect(TEMP dst, USE src, TEMP tmp); 4649 format %{ "movdl $dst,$src.lo\n\t" 4650 "movdl $tmp,$src.hi\n\t" 4651 "punpckldq $dst,$tmp\n\t" 4652 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4653 ins_encode %{ 4654 int vector_len = 1; 4655 __ movdl($dst$$XMMRegister, $src$$Register); 4656 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4657 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4658 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4659 %} 4660 ins_pipe( pipe_slow ); 4661 %} 4662 4663 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4664 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4665 match(Set dst (ReplicateL src)); 4666 effect(TEMP dst, USE src, TEMP tmp); 4667 format %{ "movdl $dst,$src.lo\n\t" 4668 "movdl $tmp,$src.hi\n\t" 4669 "punpckldq $dst,$tmp\n\t" 4670 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4671 ins_encode %{ 4672 int vector_len = 2; 4673 __ movdl($dst$$XMMRegister, $src$$Register); 4674 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4675 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4676 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4677 %} 4678 ins_pipe( pipe_slow ); 4679 %} 4680 #endif // _LP64 4681 4682 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4683 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4684 match(Set dst (ReplicateL con)); 4685 format %{ "movq $dst,[$constantaddress]\n\t" 4686 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4687 ins_encode %{ 4688 int vector_len = 1; 4689 __ movq($dst$$XMMRegister, $constantaddress($con)); 4690 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4691 %} 4692 ins_pipe( pipe_slow ); 4693 %} 4694 4695 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4696 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4697 match(Set dst (ReplicateL con)); 4698 format %{ "movq $dst,[$constantaddress]\n\t" 4699 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4700 ins_encode %{ 4701 int vector_len = 2; 4702 __ movq($dst$$XMMRegister, $constantaddress($con)); 4703 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4704 %} 4705 ins_pipe( pipe_slow ); 4706 %} 4707 4708 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4709 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4710 match(Set dst (ReplicateL (LoadL mem))); 4711 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4712 ins_encode %{ 4713 int vector_len = 0; 4714 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4715 %} 4716 ins_pipe( pipe_slow ); 4717 %} 4718 4719 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4720 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4721 match(Set dst (ReplicateL (LoadL mem))); 4722 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4723 ins_encode %{ 4724 int vector_len = 1; 4725 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4726 %} 4727 ins_pipe( pipe_slow ); 4728 %} 4729 4730 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4731 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4732 match(Set dst (ReplicateL (LoadL mem))); 4733 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4734 ins_encode %{ 4735 int vector_len = 2; 4736 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4737 %} 4738 ins_pipe( pipe_slow ); 4739 %} 4740 4741 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4742 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4743 match(Set dst (ReplicateL zero)); 4744 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4745 ins_encode %{ 4746 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4747 int vector_len = 2; 4748 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4749 %} 4750 ins_pipe( fpu_reg_reg ); 4751 %} 4752 4753 instruct Repl8F_evex(vecY dst, regF src) %{ 4754 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4755 match(Set dst (ReplicateF src)); 4756 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4757 ins_encode %{ 4758 int vector_len = 1; 4759 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4760 %} 4761 ins_pipe( pipe_slow ); 4762 %} 4763 4764 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4765 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4766 match(Set dst (ReplicateF (LoadF mem))); 4767 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4768 ins_encode %{ 4769 int vector_len = 1; 4770 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct Repl16F_evex(vecZ dst, regF src) %{ 4776 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4777 match(Set dst (ReplicateF src)); 4778 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4779 ins_encode %{ 4780 int vector_len = 2; 4781 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4787 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4788 match(Set dst (ReplicateF (LoadF mem))); 4789 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4790 ins_encode %{ 4791 int vector_len = 2; 4792 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4793 %} 4794 ins_pipe( pipe_slow ); 4795 %} 4796 4797 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4798 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4799 match(Set dst (ReplicateF zero)); 4800 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4801 ins_encode %{ 4802 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4803 int vector_len = 2; 4804 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4805 %} 4806 ins_pipe( fpu_reg_reg ); 4807 %} 4808 4809 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4810 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4811 match(Set dst (ReplicateF zero)); 4812 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4813 ins_encode %{ 4814 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4815 int vector_len = 2; 4816 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4817 %} 4818 ins_pipe( fpu_reg_reg ); 4819 %} 4820 4821 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4822 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4823 match(Set dst (ReplicateF zero)); 4824 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4825 ins_encode %{ 4826 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4827 int vector_len = 2; 4828 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4829 %} 4830 ins_pipe( fpu_reg_reg ); 4831 %} 4832 4833 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4834 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4835 match(Set dst (ReplicateF zero)); 4836 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4837 ins_encode %{ 4838 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4839 int vector_len = 2; 4840 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4841 %} 4842 ins_pipe( fpu_reg_reg ); 4843 %} 4844 4845 instruct Repl4D_evex(vecY dst, regD src) %{ 4846 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4847 match(Set dst (ReplicateD src)); 4848 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4849 ins_encode %{ 4850 int vector_len = 1; 4851 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4852 %} 4853 ins_pipe( pipe_slow ); 4854 %} 4855 4856 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4857 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4858 match(Set dst (ReplicateD (LoadD mem))); 4859 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4860 ins_encode %{ 4861 int vector_len = 1; 4862 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4863 %} 4864 ins_pipe( pipe_slow ); 4865 %} 4866 4867 instruct Repl8D_evex(vecZ dst, regD src) %{ 4868 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4869 match(Set dst (ReplicateD src)); 4870 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4871 ins_encode %{ 4872 int vector_len = 2; 4873 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4874 %} 4875 ins_pipe( pipe_slow ); 4876 %} 4877 4878 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4879 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4880 match(Set dst (ReplicateD (LoadD mem))); 4881 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4882 ins_encode %{ 4883 int vector_len = 2; 4884 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4885 %} 4886 ins_pipe( pipe_slow ); 4887 %} 4888 4889 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4890 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4891 match(Set dst (ReplicateD zero)); 4892 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4893 ins_encode %{ 4894 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4895 int vector_len = 2; 4896 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4897 %} 4898 ins_pipe( fpu_reg_reg ); 4899 %} 4900 4901 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4902 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4903 match(Set dst (ReplicateD zero)); 4904 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4905 ins_encode %{ 4906 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4907 int vector_len = 2; 4908 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4909 %} 4910 ins_pipe( fpu_reg_reg ); 4911 %} 4912 4913 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4914 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4915 match(Set dst (ReplicateD zero)); 4916 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4917 ins_encode %{ 4918 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4919 int vector_len = 2; 4920 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4921 %} 4922 ins_pipe( fpu_reg_reg ); 4923 %} 4924 4925 // ====================REDUCTION ARITHMETIC======================================= 4926 4927 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4928 predicate(UseSSE > 2 && UseAVX == 0); 4929 match(Set dst (AddReductionVI src1 src2)); 4930 effect(TEMP tmp2, TEMP tmp); 4931 format %{ "movdqu $tmp2,$src2\n\t" 4932 "phaddd $tmp2,$tmp2\n\t" 4933 "movd $tmp,$src1\n\t" 4934 "paddd $tmp,$tmp2\n\t" 4935 "movd $dst,$tmp\t! add reduction2I" %} 4936 ins_encode %{ 4937 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4938 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4939 __ movdl($tmp$$XMMRegister, $src1$$Register); 4940 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4941 __ movdl($dst$$Register, $tmp$$XMMRegister); 4942 %} 4943 ins_pipe( pipe_slow ); 4944 %} 4945 4946 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4947 predicate(VM_Version::supports_avxonly()); 4948 match(Set dst (AddReductionVI src1 src2)); 4949 effect(TEMP tmp, TEMP tmp2); 4950 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4951 "movd $tmp2,$src1\n\t" 4952 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4953 "movd $dst,$tmp2\t! add reduction2I" %} 4954 ins_encode %{ 4955 int vector_len = 0; 4956 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4957 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4958 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4959 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4965 predicate(UseAVX > 2); 4966 match(Set dst (AddReductionVI src1 src2)); 4967 effect(TEMP tmp, TEMP tmp2); 4968 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4969 "vpaddd $tmp,$src2,$tmp2\n\t" 4970 "movd $tmp2,$src1\n\t" 4971 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4972 "movd $dst,$tmp2\t! add reduction2I" %} 4973 ins_encode %{ 4974 int vector_len = 0; 4975 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4976 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4977 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4978 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4979 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4980 %} 4981 ins_pipe( pipe_slow ); 4982 %} 4983 4984 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4985 predicate(UseSSE > 2 && UseAVX == 0); 4986 match(Set dst (AddReductionVI src1 src2)); 4987 effect(TEMP tmp, TEMP tmp2); 4988 format %{ "movdqu $tmp,$src2\n\t" 4989 "phaddd $tmp,$tmp\n\t" 4990 "phaddd $tmp,$tmp\n\t" 4991 "movd $tmp2,$src1\n\t" 4992 "paddd $tmp2,$tmp\n\t" 4993 "movd $dst,$tmp2\t! add reduction4I" %} 4994 ins_encode %{ 4995 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4996 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4997 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4998 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4999 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5000 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5001 %} 5002 ins_pipe( pipe_slow ); 5003 %} 5004 5005 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5006 predicate(VM_Version::supports_avxonly()); 5007 match(Set dst (AddReductionVI src1 src2)); 5008 effect(TEMP tmp, TEMP tmp2); 5009 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5010 "vphaddd $tmp,$tmp,$tmp\n\t" 5011 "movd $tmp2,$src1\n\t" 5012 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5013 "movd $dst,$tmp2\t! add reduction4I" %} 5014 ins_encode %{ 5015 int vector_len = 0; 5016 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5017 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5018 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5019 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5020 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5021 %} 5022 ins_pipe( pipe_slow ); 5023 %} 5024 5025 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5026 predicate(UseAVX > 2); 5027 match(Set dst (AddReductionVI src1 src2)); 5028 effect(TEMP tmp, TEMP tmp2); 5029 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5030 "vpaddd $tmp,$src2,$tmp2\n\t" 5031 "pshufd $tmp2,$tmp,0x1\n\t" 5032 "vpaddd $tmp,$tmp,$tmp2\n\t" 5033 "movd $tmp2,$src1\n\t" 5034 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5035 "movd $dst,$tmp2\t! add reduction4I" %} 5036 ins_encode %{ 5037 int vector_len = 0; 5038 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5039 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5040 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5041 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5042 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5043 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5044 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5045 %} 5046 ins_pipe( pipe_slow ); 5047 %} 5048 5049 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5050 predicate(VM_Version::supports_avxonly()); 5051 match(Set dst (AddReductionVI src1 src2)); 5052 effect(TEMP tmp, TEMP tmp2); 5053 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5054 "vphaddd $tmp,$tmp,$tmp2\n\t" 5055 "vextracti128_high $tmp2,$tmp\n\t" 5056 "vpaddd $tmp,$tmp,$tmp2\n\t" 5057 "movd $tmp2,$src1\n\t" 5058 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5059 "movd $dst,$tmp2\t! add reduction8I" %} 5060 ins_encode %{ 5061 int vector_len = 1; 5062 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5063 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5064 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5065 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5066 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5067 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5068 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5069 %} 5070 ins_pipe( pipe_slow ); 5071 %} 5072 5073 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5074 predicate(UseAVX > 2); 5075 match(Set dst (AddReductionVI src1 src2)); 5076 effect(TEMP tmp, TEMP tmp2); 5077 format %{ "vextracti128_high $tmp,$src2\n\t" 5078 "vpaddd $tmp,$tmp,$src2\n\t" 5079 "pshufd $tmp2,$tmp,0xE\n\t" 5080 "vpaddd $tmp,$tmp,$tmp2\n\t" 5081 "pshufd $tmp2,$tmp,0x1\n\t" 5082 "vpaddd $tmp,$tmp,$tmp2\n\t" 5083 "movd $tmp2,$src1\n\t" 5084 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5085 "movd $dst,$tmp2\t! add reduction8I" %} 5086 ins_encode %{ 5087 int vector_len = 0; 5088 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5089 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5090 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5091 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5092 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5093 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5094 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5095 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5096 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5102 predicate(UseAVX > 2); 5103 match(Set dst (AddReductionVI src1 src2)); 5104 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5105 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5106 "vpaddd $tmp3,$tmp3,$src2\n\t" 5107 "vextracti128_high $tmp,$tmp3\n\t" 5108 "vpaddd $tmp,$tmp,$tmp3\n\t" 5109 "pshufd $tmp2,$tmp,0xE\n\t" 5110 "vpaddd $tmp,$tmp,$tmp2\n\t" 5111 "pshufd $tmp2,$tmp,0x1\n\t" 5112 "vpaddd $tmp,$tmp,$tmp2\n\t" 5113 "movd $tmp2,$src1\n\t" 5114 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5115 "movd $dst,$tmp2\t! mul reduction16I" %} 5116 ins_encode %{ 5117 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5118 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5119 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5120 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5121 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5122 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5123 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5124 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5125 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5126 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5127 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5128 %} 5129 ins_pipe( pipe_slow ); 5130 %} 5131 5132 #ifdef _LP64 5133 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5134 predicate(UseAVX > 2); 5135 match(Set dst (AddReductionVL src1 src2)); 5136 effect(TEMP tmp, TEMP tmp2); 5137 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5138 "vpaddq $tmp,$src2,$tmp2\n\t" 5139 "movdq $tmp2,$src1\n\t" 5140 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5141 "movdq $dst,$tmp2\t! add reduction2L" %} 5142 ins_encode %{ 5143 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5144 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5145 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5146 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5147 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5148 %} 5149 ins_pipe( pipe_slow ); 5150 %} 5151 5152 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5153 predicate(UseAVX > 2); 5154 match(Set dst (AddReductionVL src1 src2)); 5155 effect(TEMP tmp, TEMP tmp2); 5156 format %{ "vextracti128_high $tmp,$src2\n\t" 5157 "vpaddq $tmp2,$tmp,$src2\n\t" 5158 "pshufd $tmp,$tmp2,0xE\n\t" 5159 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5160 "movdq $tmp,$src1\n\t" 5161 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5162 "movdq $dst,$tmp2\t! add reduction4L" %} 5163 ins_encode %{ 5164 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5165 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5166 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5167 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5168 __ movdq($tmp$$XMMRegister, $src1$$Register); 5169 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5170 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5171 %} 5172 ins_pipe( pipe_slow ); 5173 %} 5174 5175 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5176 predicate(UseAVX > 2); 5177 match(Set dst (AddReductionVL src1 src2)); 5178 effect(TEMP tmp, TEMP tmp2); 5179 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5180 "vpaddq $tmp2,$tmp2,$src2\n\t" 5181 "vextracti128_high $tmp,$tmp2\n\t" 5182 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5183 "pshufd $tmp,$tmp2,0xE\n\t" 5184 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5185 "movdq $tmp,$src1\n\t" 5186 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5187 "movdq $dst,$tmp2\t! add reduction8L" %} 5188 ins_encode %{ 5189 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5190 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5191 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5192 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5193 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5194 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5195 __ movdq($tmp$$XMMRegister, $src1$$Register); 5196 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5197 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5198 %} 5199 ins_pipe( pipe_slow ); 5200 %} 5201 #endif 5202 5203 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5204 predicate(UseSSE >= 1 && UseAVX == 0); 5205 match(Set dst (AddReductionVF dst src2)); 5206 effect(TEMP dst, TEMP tmp); 5207 format %{ "addss $dst,$src2\n\t" 5208 "pshufd $tmp,$src2,0x01\n\t" 5209 "addss $dst,$tmp\t! add reduction2F" %} 5210 ins_encode %{ 5211 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5212 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5213 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5214 %} 5215 ins_pipe( pipe_slow ); 5216 %} 5217 5218 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5219 predicate(UseAVX > 0); 5220 match(Set dst (AddReductionVF dst src2)); 5221 effect(TEMP dst, TEMP tmp); 5222 format %{ "vaddss $dst,$dst,$src2\n\t" 5223 "pshufd $tmp,$src2,0x01\n\t" 5224 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5225 ins_encode %{ 5226 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5227 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5228 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5229 %} 5230 ins_pipe( pipe_slow ); 5231 %} 5232 5233 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5234 predicate(UseSSE >= 1 && UseAVX == 0); 5235 match(Set dst (AddReductionVF dst src2)); 5236 effect(TEMP dst, TEMP tmp); 5237 format %{ "addss $dst,$src2\n\t" 5238 "pshufd $tmp,$src2,0x01\n\t" 5239 "addss $dst,$tmp\n\t" 5240 "pshufd $tmp,$src2,0x02\n\t" 5241 "addss $dst,$tmp\n\t" 5242 "pshufd $tmp,$src2,0x03\n\t" 5243 "addss $dst,$tmp\t! add reduction4F" %} 5244 ins_encode %{ 5245 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5246 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5247 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5248 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5249 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5250 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5251 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5252 %} 5253 ins_pipe( pipe_slow ); 5254 %} 5255 5256 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5257 predicate(UseAVX > 0); 5258 match(Set dst (AddReductionVF dst src2)); 5259 effect(TEMP tmp, TEMP dst); 5260 format %{ "vaddss $dst,dst,$src2\n\t" 5261 "pshufd $tmp,$src2,0x01\n\t" 5262 "vaddss $dst,$dst,$tmp\n\t" 5263 "pshufd $tmp,$src2,0x02\n\t" 5264 "vaddss $dst,$dst,$tmp\n\t" 5265 "pshufd $tmp,$src2,0x03\n\t" 5266 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5267 ins_encode %{ 5268 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5269 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5270 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5271 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5272 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5273 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5274 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5275 %} 5276 ins_pipe( pipe_slow ); 5277 %} 5278 5279 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5280 predicate(UseAVX > 0); 5281 match(Set dst (AddReductionVF dst src2)); 5282 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5283 format %{ "vaddss $dst,$dst,$src2\n\t" 5284 "pshufd $tmp,$src2,0x01\n\t" 5285 "vaddss $dst,$dst,$tmp\n\t" 5286 "pshufd $tmp,$src2,0x02\n\t" 5287 "vaddss $dst,$dst,$tmp\n\t" 5288 "pshufd $tmp,$src2,0x03\n\t" 5289 "vaddss $dst,$dst,$tmp\n\t" 5290 "vextractf128_high $tmp2,$src2\n\t" 5291 "vaddss $dst,$dst,$tmp2\n\t" 5292 "pshufd $tmp,$tmp2,0x01\n\t" 5293 "vaddss $dst,$dst,$tmp\n\t" 5294 "pshufd $tmp,$tmp2,0x02\n\t" 5295 "vaddss $dst,$dst,$tmp\n\t" 5296 "pshufd $tmp,$tmp2,0x03\n\t" 5297 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5298 ins_encode %{ 5299 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5300 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5301 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5302 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5303 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5304 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5305 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5306 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5307 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5308 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5309 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5310 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5311 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5312 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5313 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5314 %} 5315 ins_pipe( pipe_slow ); 5316 %} 5317 5318 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5319 predicate(UseAVX > 2); 5320 match(Set dst (AddReductionVF dst src2)); 5321 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5322 format %{ "vaddss $dst,$dst,$src2\n\t" 5323 "pshufd $tmp,$src2,0x01\n\t" 5324 "vaddss $dst,$dst,$tmp\n\t" 5325 "pshufd $tmp,$src2,0x02\n\t" 5326 "vaddss $dst,$dst,$tmp\n\t" 5327 "pshufd $tmp,$src2,0x03\n\t" 5328 "vaddss $dst,$dst,$tmp\n\t" 5329 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5330 "vaddss $dst,$dst,$tmp2\n\t" 5331 "pshufd $tmp,$tmp2,0x01\n\t" 5332 "vaddss $dst,$dst,$tmp\n\t" 5333 "pshufd $tmp,$tmp2,0x02\n\t" 5334 "vaddss $dst,$dst,$tmp\n\t" 5335 "pshufd $tmp,$tmp2,0x03\n\t" 5336 "vaddss $dst,$dst,$tmp\n\t" 5337 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5338 "vaddss $dst,$dst,$tmp2\n\t" 5339 "pshufd $tmp,$tmp2,0x01\n\t" 5340 "vaddss $dst,$dst,$tmp\n\t" 5341 "pshufd $tmp,$tmp2,0x02\n\t" 5342 "vaddss $dst,$dst,$tmp\n\t" 5343 "pshufd $tmp,$tmp2,0x03\n\t" 5344 "vaddss $dst,$dst,$tmp\n\t" 5345 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5346 "vaddss $dst,$dst,$tmp2\n\t" 5347 "pshufd $tmp,$tmp2,0x01\n\t" 5348 "vaddss $dst,$dst,$tmp\n\t" 5349 "pshufd $tmp,$tmp2,0x02\n\t" 5350 "vaddss $dst,$dst,$tmp\n\t" 5351 "pshufd $tmp,$tmp2,0x03\n\t" 5352 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5353 ins_encode %{ 5354 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5355 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5356 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5357 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5358 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5359 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5360 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5361 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5362 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5363 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5364 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5365 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5366 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5367 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5368 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5369 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5370 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5371 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5372 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5373 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5374 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5375 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5376 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5377 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5378 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5379 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5380 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5381 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5382 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5383 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5384 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5385 %} 5386 ins_pipe( pipe_slow ); 5387 %} 5388 5389 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5390 predicate(UseSSE >= 1 && UseAVX == 0); 5391 match(Set dst (AddReductionVD dst src2)); 5392 effect(TEMP tmp, TEMP dst); 5393 format %{ "addsd $dst,$src2\n\t" 5394 "pshufd $tmp,$src2,0xE\n\t" 5395 "addsd $dst,$tmp\t! add reduction2D" %} 5396 ins_encode %{ 5397 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5398 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5399 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5405 predicate(UseAVX > 0); 5406 match(Set dst (AddReductionVD dst src2)); 5407 effect(TEMP tmp, TEMP dst); 5408 format %{ "vaddsd $dst,$dst,$src2\n\t" 5409 "pshufd $tmp,$src2,0xE\n\t" 5410 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5411 ins_encode %{ 5412 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5413 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5414 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5415 %} 5416 ins_pipe( pipe_slow ); 5417 %} 5418 5419 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5420 predicate(UseAVX > 0); 5421 match(Set dst (AddReductionVD dst src2)); 5422 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5423 format %{ "vaddsd $dst,$dst,$src2\n\t" 5424 "pshufd $tmp,$src2,0xE\n\t" 5425 "vaddsd $dst,$dst,$tmp\n\t" 5426 "vextractf128 $tmp2,$src2,0x1\n\t" 5427 "vaddsd $dst,$dst,$tmp2\n\t" 5428 "pshufd $tmp,$tmp2,0xE\n\t" 5429 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5430 ins_encode %{ 5431 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5432 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5433 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5434 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5435 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5436 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5437 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5438 %} 5439 ins_pipe( pipe_slow ); 5440 %} 5441 5442 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5443 predicate(UseAVX > 2); 5444 match(Set dst (AddReductionVD dst src2)); 5445 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5446 format %{ "vaddsd $dst,$dst,$src2\n\t" 5447 "pshufd $tmp,$src2,0xE\n\t" 5448 "vaddsd $dst,$dst,$tmp\n\t" 5449 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5450 "vaddsd $dst,$dst,$tmp2\n\t" 5451 "pshufd $tmp,$tmp2,0xE\n\t" 5452 "vaddsd $dst,$dst,$tmp\n\t" 5453 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5454 "vaddsd $dst,$dst,$tmp2\n\t" 5455 "pshufd $tmp,$tmp2,0xE\n\t" 5456 "vaddsd $dst,$dst,$tmp\n\t" 5457 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5458 "vaddsd $dst,$dst,$tmp2\n\t" 5459 "pshufd $tmp,$tmp2,0xE\n\t" 5460 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5461 ins_encode %{ 5462 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5463 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5464 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5465 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5466 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5467 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5468 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5469 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5470 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5471 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5472 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5473 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5474 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5475 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5476 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5477 %} 5478 ins_pipe( pipe_slow ); 5479 %} 5480 5481 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5482 predicate(UseSSE > 3 && UseAVX == 0); 5483 match(Set dst (MulReductionVI src1 src2)); 5484 effect(TEMP tmp, TEMP tmp2); 5485 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5486 "pmulld $tmp2,$src2\n\t" 5487 "movd $tmp,$src1\n\t" 5488 "pmulld $tmp2,$tmp\n\t" 5489 "movd $dst,$tmp2\t! mul reduction2I" %} 5490 ins_encode %{ 5491 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5492 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5493 __ movdl($tmp$$XMMRegister, $src1$$Register); 5494 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5495 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5496 %} 5497 ins_pipe( pipe_slow ); 5498 %} 5499 5500 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5501 predicate(UseAVX > 0); 5502 match(Set dst (MulReductionVI src1 src2)); 5503 effect(TEMP tmp, TEMP tmp2); 5504 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5505 "vpmulld $tmp,$src2,$tmp2\n\t" 5506 "movd $tmp2,$src1\n\t" 5507 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5508 "movd $dst,$tmp2\t! mul reduction2I" %} 5509 ins_encode %{ 5510 int vector_len = 0; 5511 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5512 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5513 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5514 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5515 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5516 %} 5517 ins_pipe( pipe_slow ); 5518 %} 5519 5520 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5521 predicate(UseSSE > 3 && UseAVX == 0); 5522 match(Set dst (MulReductionVI src1 src2)); 5523 effect(TEMP tmp, TEMP tmp2); 5524 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5525 "pmulld $tmp2,$src2\n\t" 5526 "pshufd $tmp,$tmp2,0x1\n\t" 5527 "pmulld $tmp2,$tmp\n\t" 5528 "movd $tmp,$src1\n\t" 5529 "pmulld $tmp2,$tmp\n\t" 5530 "movd $dst,$tmp2\t! mul reduction4I" %} 5531 ins_encode %{ 5532 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5533 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5534 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5535 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5536 __ movdl($tmp$$XMMRegister, $src1$$Register); 5537 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5538 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5539 %} 5540 ins_pipe( pipe_slow ); 5541 %} 5542 5543 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5544 predicate(UseAVX > 0); 5545 match(Set dst (MulReductionVI src1 src2)); 5546 effect(TEMP tmp, TEMP tmp2); 5547 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5548 "vpmulld $tmp,$src2,$tmp2\n\t" 5549 "pshufd $tmp2,$tmp,0x1\n\t" 5550 "vpmulld $tmp,$tmp,$tmp2\n\t" 5551 "movd $tmp2,$src1\n\t" 5552 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5553 "movd $dst,$tmp2\t! mul reduction4I" %} 5554 ins_encode %{ 5555 int vector_len = 0; 5556 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5557 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5558 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5559 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5560 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5561 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5562 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5563 %} 5564 ins_pipe( pipe_slow ); 5565 %} 5566 5567 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5568 predicate(UseAVX > 1); 5569 match(Set dst (MulReductionVI src1 src2)); 5570 effect(TEMP tmp, TEMP tmp2); 5571 format %{ "vextracti128_high $tmp,$src2\n\t" 5572 "vpmulld $tmp,$tmp,$src2\n\t" 5573 "pshufd $tmp2,$tmp,0xE\n\t" 5574 "vpmulld $tmp,$tmp,$tmp2\n\t" 5575 "pshufd $tmp2,$tmp,0x1\n\t" 5576 "vpmulld $tmp,$tmp,$tmp2\n\t" 5577 "movd $tmp2,$src1\n\t" 5578 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5579 "movd $dst,$tmp2\t! mul reduction8I" %} 5580 ins_encode %{ 5581 int vector_len = 0; 5582 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5583 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5584 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5585 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5586 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5587 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5588 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5589 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5590 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5591 %} 5592 ins_pipe( pipe_slow ); 5593 %} 5594 5595 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5596 predicate(UseAVX > 2); 5597 match(Set dst (MulReductionVI src1 src2)); 5598 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5599 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5600 "vpmulld $tmp3,$tmp3,$src2\n\t" 5601 "vextracti128_high $tmp,$tmp3\n\t" 5602 "vpmulld $tmp,$tmp,$src2\n\t" 5603 "pshufd $tmp2,$tmp,0xE\n\t" 5604 "vpmulld $tmp,$tmp,$tmp2\n\t" 5605 "pshufd $tmp2,$tmp,0x1\n\t" 5606 "vpmulld $tmp,$tmp,$tmp2\n\t" 5607 "movd $tmp2,$src1\n\t" 5608 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5609 "movd $dst,$tmp2\t! mul reduction16I" %} 5610 ins_encode %{ 5611 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5612 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5613 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5614 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5615 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5616 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5617 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5618 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5619 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5620 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5621 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 #ifdef _LP64 5627 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5628 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5629 match(Set dst (MulReductionVL src1 src2)); 5630 effect(TEMP tmp, TEMP tmp2); 5631 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5632 "vpmullq $tmp,$src2,$tmp2\n\t" 5633 "movdq $tmp2,$src1\n\t" 5634 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5635 "movdq $dst,$tmp2\t! mul reduction2L" %} 5636 ins_encode %{ 5637 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5638 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5639 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5640 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5641 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5642 %} 5643 ins_pipe( pipe_slow ); 5644 %} 5645 5646 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5647 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5648 match(Set dst (MulReductionVL src1 src2)); 5649 effect(TEMP tmp, TEMP tmp2); 5650 format %{ "vextracti128_high $tmp,$src2\n\t" 5651 "vpmullq $tmp2,$tmp,$src2\n\t" 5652 "pshufd $tmp,$tmp2,0xE\n\t" 5653 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5654 "movdq $tmp,$src1\n\t" 5655 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5656 "movdq $dst,$tmp2\t! mul reduction4L" %} 5657 ins_encode %{ 5658 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5659 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5660 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5661 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5662 __ movdq($tmp$$XMMRegister, $src1$$Register); 5663 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5664 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5670 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5671 match(Set dst (MulReductionVL src1 src2)); 5672 effect(TEMP tmp, TEMP tmp2); 5673 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5674 "vpmullq $tmp2,$tmp2,$src2\n\t" 5675 "vextracti128_high $tmp,$tmp2\n\t" 5676 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5677 "pshufd $tmp,$tmp2,0xE\n\t" 5678 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5679 "movdq $tmp,$src1\n\t" 5680 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5681 "movdq $dst,$tmp2\t! mul reduction8L" %} 5682 ins_encode %{ 5683 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5684 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5685 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5686 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5687 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5688 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5689 __ movdq($tmp$$XMMRegister, $src1$$Register); 5690 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5691 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5692 %} 5693 ins_pipe( pipe_slow ); 5694 %} 5695 #endif 5696 5697 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5698 predicate(UseSSE >= 1 && UseAVX == 0); 5699 match(Set dst (MulReductionVF dst src2)); 5700 effect(TEMP dst, TEMP tmp); 5701 format %{ "mulss $dst,$src2\n\t" 5702 "pshufd $tmp,$src2,0x01\n\t" 5703 "mulss $dst,$tmp\t! mul reduction2F" %} 5704 ins_encode %{ 5705 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5706 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5707 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5713 predicate(UseAVX > 0); 5714 match(Set dst (MulReductionVF dst src2)); 5715 effect(TEMP tmp, TEMP dst); 5716 format %{ "vmulss $dst,$dst,$src2\n\t" 5717 "pshufd $tmp,$src2,0x01\n\t" 5718 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5719 ins_encode %{ 5720 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5721 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5722 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5728 predicate(UseSSE >= 1 && UseAVX == 0); 5729 match(Set dst (MulReductionVF dst src2)); 5730 effect(TEMP dst, TEMP tmp); 5731 format %{ "mulss $dst,$src2\n\t" 5732 "pshufd $tmp,$src2,0x01\n\t" 5733 "mulss $dst,$tmp\n\t" 5734 "pshufd $tmp,$src2,0x02\n\t" 5735 "mulss $dst,$tmp\n\t" 5736 "pshufd $tmp,$src2,0x03\n\t" 5737 "mulss $dst,$tmp\t! mul reduction4F" %} 5738 ins_encode %{ 5739 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5740 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5741 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5742 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5743 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5744 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5745 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5751 predicate(UseAVX > 0); 5752 match(Set dst (MulReductionVF dst src2)); 5753 effect(TEMP tmp, TEMP dst); 5754 format %{ "vmulss $dst,$dst,$src2\n\t" 5755 "pshufd $tmp,$src2,0x01\n\t" 5756 "vmulss $dst,$dst,$tmp\n\t" 5757 "pshufd $tmp,$src2,0x02\n\t" 5758 "vmulss $dst,$dst,$tmp\n\t" 5759 "pshufd $tmp,$src2,0x03\n\t" 5760 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5761 ins_encode %{ 5762 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5763 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5764 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5765 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5766 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5767 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5768 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5769 %} 5770 ins_pipe( pipe_slow ); 5771 %} 5772 5773 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5774 predicate(UseAVX > 0); 5775 match(Set dst (MulReductionVF dst src2)); 5776 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5777 format %{ "vmulss $dst,$dst,$src2\n\t" 5778 "pshufd $tmp,$src2,0x01\n\t" 5779 "vmulss $dst,$dst,$tmp\n\t" 5780 "pshufd $tmp,$src2,0x02\n\t" 5781 "vmulss $dst,$dst,$tmp\n\t" 5782 "pshufd $tmp,$src2,0x03\n\t" 5783 "vmulss $dst,$dst,$tmp\n\t" 5784 "vextractf128_high $tmp2,$src2\n\t" 5785 "vmulss $dst,$dst,$tmp2\n\t" 5786 "pshufd $tmp,$tmp2,0x01\n\t" 5787 "vmulss $dst,$dst,$tmp\n\t" 5788 "pshufd $tmp,$tmp2,0x02\n\t" 5789 "vmulss $dst,$dst,$tmp\n\t" 5790 "pshufd $tmp,$tmp2,0x03\n\t" 5791 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5792 ins_encode %{ 5793 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5794 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5795 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5796 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5797 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5798 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5799 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5800 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5801 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5802 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5803 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5804 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5805 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5806 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5807 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5808 %} 5809 ins_pipe( pipe_slow ); 5810 %} 5811 5812 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5813 predicate(UseAVX > 2); 5814 match(Set dst (MulReductionVF dst src2)); 5815 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5816 format %{ "vmulss $dst,$dst,$src2\n\t" 5817 "pshufd $tmp,$src2,0x01\n\t" 5818 "vmulss $dst,$dst,$tmp\n\t" 5819 "pshufd $tmp,$src2,0x02\n\t" 5820 "vmulss $dst,$dst,$tmp\n\t" 5821 "pshufd $tmp,$src2,0x03\n\t" 5822 "vmulss $dst,$dst,$tmp\n\t" 5823 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5824 "vmulss $dst,$dst,$tmp2\n\t" 5825 "pshufd $tmp,$tmp2,0x01\n\t" 5826 "vmulss $dst,$dst,$tmp\n\t" 5827 "pshufd $tmp,$tmp2,0x02\n\t" 5828 "vmulss $dst,$dst,$tmp\n\t" 5829 "pshufd $tmp,$tmp2,0x03\n\t" 5830 "vmulss $dst,$dst,$tmp\n\t" 5831 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5832 "vmulss $dst,$dst,$tmp2\n\t" 5833 "pshufd $tmp,$tmp2,0x01\n\t" 5834 "vmulss $dst,$dst,$tmp\n\t" 5835 "pshufd $tmp,$tmp2,0x02\n\t" 5836 "vmulss $dst,$dst,$tmp\n\t" 5837 "pshufd $tmp,$tmp2,0x03\n\t" 5838 "vmulss $dst,$dst,$tmp\n\t" 5839 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5840 "vmulss $dst,$dst,$tmp2\n\t" 5841 "pshufd $tmp,$tmp2,0x01\n\t" 5842 "vmulss $dst,$dst,$tmp\n\t" 5843 "pshufd $tmp,$tmp2,0x02\n\t" 5844 "vmulss $dst,$dst,$tmp\n\t" 5845 "pshufd $tmp,$tmp2,0x03\n\t" 5846 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5847 ins_encode %{ 5848 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5849 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5850 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5851 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5852 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5853 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5854 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5855 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5856 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5857 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5858 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5859 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5860 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5861 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5862 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5863 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5864 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5865 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5866 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5867 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5868 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5869 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5870 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5871 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5872 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5873 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5874 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5875 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5876 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5877 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5878 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5884 predicate(UseSSE >= 1 && UseAVX == 0); 5885 match(Set dst (MulReductionVD dst src2)); 5886 effect(TEMP dst, TEMP tmp); 5887 format %{ "mulsd $dst,$src2\n\t" 5888 "pshufd $tmp,$src2,0xE\n\t" 5889 "mulsd $dst,$tmp\t! mul reduction2D" %} 5890 ins_encode %{ 5891 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5892 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5893 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5899 predicate(UseAVX > 0); 5900 match(Set dst (MulReductionVD dst src2)); 5901 effect(TEMP tmp, TEMP dst); 5902 format %{ "vmulsd $dst,$dst,$src2\n\t" 5903 "pshufd $tmp,$src2,0xE\n\t" 5904 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5905 ins_encode %{ 5906 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5907 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5908 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5914 predicate(UseAVX > 0); 5915 match(Set dst (MulReductionVD dst src2)); 5916 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5917 format %{ "vmulsd $dst,$dst,$src2\n\t" 5918 "pshufd $tmp,$src2,0xE\n\t" 5919 "vmulsd $dst,$dst,$tmp\n\t" 5920 "vextractf128_high $tmp2,$src2\n\t" 5921 "vmulsd $dst,$dst,$tmp2\n\t" 5922 "pshufd $tmp,$tmp2,0xE\n\t" 5923 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5924 ins_encode %{ 5925 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5926 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5927 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5928 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5929 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5930 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5931 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5937 predicate(UseAVX > 2); 5938 match(Set dst (MulReductionVD dst src2)); 5939 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5940 format %{ "vmulsd $dst,$dst,$src2\n\t" 5941 "pshufd $tmp,$src2,0xE\n\t" 5942 "vmulsd $dst,$dst,$tmp\n\t" 5943 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5944 "vmulsd $dst,$dst,$tmp2\n\t" 5945 "pshufd $tmp,$src2,0xE\n\t" 5946 "vmulsd $dst,$dst,$tmp\n\t" 5947 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5948 "vmulsd $dst,$dst,$tmp2\n\t" 5949 "pshufd $tmp,$tmp2,0xE\n\t" 5950 "vmulsd $dst,$dst,$tmp\n\t" 5951 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5952 "vmulsd $dst,$dst,$tmp2\n\t" 5953 "pshufd $tmp,$tmp2,0xE\n\t" 5954 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5955 ins_encode %{ 5956 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5957 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5958 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5959 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5960 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5961 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5962 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5963 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5964 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5965 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5966 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5967 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5968 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5969 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5970 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5971 %} 5972 ins_pipe( pipe_slow ); 5973 %} 5974 5975 // ====================VECTOR ARITHMETIC======================================= 5976 5977 // --------------------------------- ADD -------------------------------------- 5978 5979 // Bytes vector add 5980 instruct vadd4B(vecS dst, vecS src) %{ 5981 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5982 match(Set dst (AddVB dst src)); 5983 format %{ "paddb $dst,$src\t! add packed4B" %} 5984 ins_encode %{ 5985 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5986 %} 5987 ins_pipe( pipe_slow ); 5988 %} 5989 5990 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5991 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5992 match(Set dst (AddVB src1 src2)); 5993 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5994 ins_encode %{ 5995 int vector_len = 0; 5996 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5997 %} 5998 ins_pipe( pipe_slow ); 5999 %} 6000 6001 6002 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6003 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6004 match(Set dst (AddVB src (LoadVector mem))); 6005 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6006 ins_encode %{ 6007 int vector_len = 0; 6008 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct vadd8B(vecD dst, vecD src) %{ 6014 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6015 match(Set dst (AddVB dst src)); 6016 format %{ "paddb $dst,$src\t! add packed8B" %} 6017 ins_encode %{ 6018 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6019 %} 6020 ins_pipe( pipe_slow ); 6021 %} 6022 6023 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6024 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6025 match(Set dst (AddVB src1 src2)); 6026 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6027 ins_encode %{ 6028 int vector_len = 0; 6029 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6030 %} 6031 ins_pipe( pipe_slow ); 6032 %} 6033 6034 6035 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6036 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6037 match(Set dst (AddVB src (LoadVector mem))); 6038 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6039 ins_encode %{ 6040 int vector_len = 0; 6041 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 instruct vadd16B(vecX dst, vecX src) %{ 6047 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6048 match(Set dst (AddVB dst src)); 6049 format %{ "paddb $dst,$src\t! add packed16B" %} 6050 ins_encode %{ 6051 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 6056 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6057 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6058 match(Set dst (AddVB src1 src2)); 6059 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6060 ins_encode %{ 6061 int vector_len = 0; 6062 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6068 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6069 match(Set dst (AddVB src (LoadVector mem))); 6070 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6071 ins_encode %{ 6072 int vector_len = 0; 6073 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6074 %} 6075 ins_pipe( pipe_slow ); 6076 %} 6077 6078 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6079 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6080 match(Set dst (AddVB src1 src2)); 6081 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6082 ins_encode %{ 6083 int vector_len = 1; 6084 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6085 %} 6086 ins_pipe( pipe_slow ); 6087 %} 6088 6089 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6090 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6091 match(Set dst (AddVB src (LoadVector mem))); 6092 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6093 ins_encode %{ 6094 int vector_len = 1; 6095 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6096 %} 6097 ins_pipe( pipe_slow ); 6098 %} 6099 6100 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6101 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6102 match(Set dst (AddVB src1 src2)); 6103 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6104 ins_encode %{ 6105 int vector_len = 2; 6106 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6107 %} 6108 ins_pipe( pipe_slow ); 6109 %} 6110 6111 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6112 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6113 match(Set dst (AddVB src (LoadVector mem))); 6114 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6115 ins_encode %{ 6116 int vector_len = 2; 6117 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6118 %} 6119 ins_pipe( pipe_slow ); 6120 %} 6121 6122 // Shorts/Chars vector add 6123 instruct vadd2S(vecS dst, vecS src) %{ 6124 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6125 match(Set dst (AddVS dst src)); 6126 format %{ "paddw $dst,$src\t! add packed2S" %} 6127 ins_encode %{ 6128 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6129 %} 6130 ins_pipe( pipe_slow ); 6131 %} 6132 6133 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6134 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6135 match(Set dst (AddVS src1 src2)); 6136 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6137 ins_encode %{ 6138 int vector_len = 0; 6139 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6140 %} 6141 ins_pipe( pipe_slow ); 6142 %} 6143 6144 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6145 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6146 match(Set dst (AddVS src (LoadVector mem))); 6147 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6148 ins_encode %{ 6149 int vector_len = 0; 6150 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vadd4S(vecD dst, vecD src) %{ 6156 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6157 match(Set dst (AddVS dst src)); 6158 format %{ "paddw $dst,$src\t! add packed4S" %} 6159 ins_encode %{ 6160 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6166 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6167 match(Set dst (AddVS src1 src2)); 6168 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6169 ins_encode %{ 6170 int vector_len = 0; 6171 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6177 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6178 match(Set dst (AddVS src (LoadVector mem))); 6179 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6180 ins_encode %{ 6181 int vector_len = 0; 6182 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 instruct vadd8S(vecX dst, vecX src) %{ 6188 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6189 match(Set dst (AddVS dst src)); 6190 format %{ "paddw $dst,$src\t! add packed8S" %} 6191 ins_encode %{ 6192 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6196 6197 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6198 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6199 match(Set dst (AddVS src1 src2)); 6200 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6201 ins_encode %{ 6202 int vector_len = 0; 6203 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6209 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6210 match(Set dst (AddVS src (LoadVector mem))); 6211 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6212 ins_encode %{ 6213 int vector_len = 0; 6214 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6220 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6221 match(Set dst (AddVS src1 src2)); 6222 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6223 ins_encode %{ 6224 int vector_len = 1; 6225 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6231 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6232 match(Set dst (AddVS src (LoadVector mem))); 6233 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6234 ins_encode %{ 6235 int vector_len = 1; 6236 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6242 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6243 match(Set dst (AddVS src1 src2)); 6244 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6245 ins_encode %{ 6246 int vector_len = 2; 6247 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6253 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6254 match(Set dst (AddVS src (LoadVector mem))); 6255 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6256 ins_encode %{ 6257 int vector_len = 2; 6258 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 // Integers vector add 6264 instruct vadd2I(vecD dst, vecD src) %{ 6265 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6266 match(Set dst (AddVI dst src)); 6267 format %{ "paddd $dst,$src\t! add packed2I" %} 6268 ins_encode %{ 6269 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6275 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6276 match(Set dst (AddVI src1 src2)); 6277 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6278 ins_encode %{ 6279 int vector_len = 0; 6280 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6286 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6287 match(Set dst (AddVI src (LoadVector mem))); 6288 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6289 ins_encode %{ 6290 int vector_len = 0; 6291 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vadd4I(vecX dst, vecX src) %{ 6297 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6298 match(Set dst (AddVI dst src)); 6299 format %{ "paddd $dst,$src\t! add packed4I" %} 6300 ins_encode %{ 6301 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6307 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6308 match(Set dst (AddVI src1 src2)); 6309 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6310 ins_encode %{ 6311 int vector_len = 0; 6312 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6318 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6319 match(Set dst (AddVI src (LoadVector mem))); 6320 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6321 ins_encode %{ 6322 int vector_len = 0; 6323 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6329 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6330 match(Set dst (AddVI src1 src2)); 6331 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6332 ins_encode %{ 6333 int vector_len = 1; 6334 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6340 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6341 match(Set dst (AddVI src (LoadVector mem))); 6342 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6343 ins_encode %{ 6344 int vector_len = 1; 6345 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6346 %} 6347 ins_pipe( pipe_slow ); 6348 %} 6349 6350 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6351 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6352 match(Set dst (AddVI src1 src2)); 6353 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6354 ins_encode %{ 6355 int vector_len = 2; 6356 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6362 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6363 match(Set dst (AddVI src (LoadVector mem))); 6364 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6365 ins_encode %{ 6366 int vector_len = 2; 6367 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 // Longs vector add 6373 instruct vadd2L(vecX dst, vecX src) %{ 6374 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6375 match(Set dst (AddVL dst src)); 6376 format %{ "paddq $dst,$src\t! add packed2L" %} 6377 ins_encode %{ 6378 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6384 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6385 match(Set dst (AddVL src1 src2)); 6386 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6387 ins_encode %{ 6388 int vector_len = 0; 6389 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6390 %} 6391 ins_pipe( pipe_slow ); 6392 %} 6393 6394 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6396 match(Set dst (AddVL src (LoadVector mem))); 6397 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6398 ins_encode %{ 6399 int vector_len = 0; 6400 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6406 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6407 match(Set dst (AddVL src1 src2)); 6408 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6409 ins_encode %{ 6410 int vector_len = 1; 6411 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6417 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6418 match(Set dst (AddVL src (LoadVector mem))); 6419 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6420 ins_encode %{ 6421 int vector_len = 1; 6422 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6428 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6429 match(Set dst (AddVL src1 src2)); 6430 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6431 ins_encode %{ 6432 int vector_len = 2; 6433 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6434 %} 6435 ins_pipe( pipe_slow ); 6436 %} 6437 6438 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6439 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6440 match(Set dst (AddVL src (LoadVector mem))); 6441 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6442 ins_encode %{ 6443 int vector_len = 2; 6444 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6445 %} 6446 ins_pipe( pipe_slow ); 6447 %} 6448 6449 // Floats vector add 6450 instruct vadd2F(vecD dst, vecD src) %{ 6451 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6452 match(Set dst (AddVF dst src)); 6453 format %{ "addps $dst,$src\t! add packed2F" %} 6454 ins_encode %{ 6455 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6456 %} 6457 ins_pipe( pipe_slow ); 6458 %} 6459 6460 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6461 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6462 match(Set dst (AddVF src1 src2)); 6463 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6464 ins_encode %{ 6465 int vector_len = 0; 6466 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6472 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6473 match(Set dst (AddVF src (LoadVector mem))); 6474 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6475 ins_encode %{ 6476 int vector_len = 0; 6477 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6478 %} 6479 ins_pipe( pipe_slow ); 6480 %} 6481 6482 instruct vadd4F(vecX dst, vecX src) %{ 6483 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6484 match(Set dst (AddVF dst src)); 6485 format %{ "addps $dst,$src\t! add packed4F" %} 6486 ins_encode %{ 6487 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6493 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6494 match(Set dst (AddVF src1 src2)); 6495 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6496 ins_encode %{ 6497 int vector_len = 0; 6498 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6504 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6505 match(Set dst (AddVF src (LoadVector mem))); 6506 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6507 ins_encode %{ 6508 int vector_len = 0; 6509 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6515 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6516 match(Set dst (AddVF src1 src2)); 6517 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6518 ins_encode %{ 6519 int vector_len = 1; 6520 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6521 %} 6522 ins_pipe( pipe_slow ); 6523 %} 6524 6525 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6526 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6527 match(Set dst (AddVF src (LoadVector mem))); 6528 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6529 ins_encode %{ 6530 int vector_len = 1; 6531 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6532 %} 6533 ins_pipe( pipe_slow ); 6534 %} 6535 6536 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6537 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6538 match(Set dst (AddVF src1 src2)); 6539 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6540 ins_encode %{ 6541 int vector_len = 2; 6542 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6548 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6549 match(Set dst (AddVF src (LoadVector mem))); 6550 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6551 ins_encode %{ 6552 int vector_len = 2; 6553 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 // Doubles vector add 6559 instruct vadd2D(vecX dst, vecX src) %{ 6560 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6561 match(Set dst (AddVD dst src)); 6562 format %{ "addpd $dst,$src\t! add packed2D" %} 6563 ins_encode %{ 6564 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6570 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6571 match(Set dst (AddVD src1 src2)); 6572 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6573 ins_encode %{ 6574 int vector_len = 0; 6575 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6576 %} 6577 ins_pipe( pipe_slow ); 6578 %} 6579 6580 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6581 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6582 match(Set dst (AddVD src (LoadVector mem))); 6583 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6584 ins_encode %{ 6585 int vector_len = 0; 6586 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6587 %} 6588 ins_pipe( pipe_slow ); 6589 %} 6590 6591 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6592 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6593 match(Set dst (AddVD src1 src2)); 6594 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6595 ins_encode %{ 6596 int vector_len = 1; 6597 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6598 %} 6599 ins_pipe( pipe_slow ); 6600 %} 6601 6602 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6603 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6604 match(Set dst (AddVD src (LoadVector mem))); 6605 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6606 ins_encode %{ 6607 int vector_len = 1; 6608 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6609 %} 6610 ins_pipe( pipe_slow ); 6611 %} 6612 6613 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6614 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6615 match(Set dst (AddVD src1 src2)); 6616 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6617 ins_encode %{ 6618 int vector_len = 2; 6619 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6620 %} 6621 ins_pipe( pipe_slow ); 6622 %} 6623 6624 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6625 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6626 match(Set dst (AddVD src (LoadVector mem))); 6627 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6628 ins_encode %{ 6629 int vector_len = 2; 6630 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6631 %} 6632 ins_pipe( pipe_slow ); 6633 %} 6634 6635 // --------------------------------- SUB -------------------------------------- 6636 6637 // Bytes vector sub 6638 instruct vsub4B(vecS dst, vecS src) %{ 6639 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6640 match(Set dst (SubVB dst src)); 6641 format %{ "psubb $dst,$src\t! sub packed4B" %} 6642 ins_encode %{ 6643 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6644 %} 6645 ins_pipe( pipe_slow ); 6646 %} 6647 6648 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6650 match(Set dst (SubVB src1 src2)); 6651 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6652 ins_encode %{ 6653 int vector_len = 0; 6654 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6655 %} 6656 ins_pipe( pipe_slow ); 6657 %} 6658 6659 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6660 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6661 match(Set dst (SubVB src (LoadVector mem))); 6662 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6663 ins_encode %{ 6664 int vector_len = 0; 6665 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6666 %} 6667 ins_pipe( pipe_slow ); 6668 %} 6669 6670 instruct vsub8B(vecD dst, vecD src) %{ 6671 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6672 match(Set dst (SubVB dst src)); 6673 format %{ "psubb $dst,$src\t! sub packed8B" %} 6674 ins_encode %{ 6675 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6676 %} 6677 ins_pipe( pipe_slow ); 6678 %} 6679 6680 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6681 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6682 match(Set dst (SubVB src1 src2)); 6683 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6684 ins_encode %{ 6685 int vector_len = 0; 6686 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6692 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6693 match(Set dst (SubVB src (LoadVector mem))); 6694 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6695 ins_encode %{ 6696 int vector_len = 0; 6697 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6698 %} 6699 ins_pipe( pipe_slow ); 6700 %} 6701 6702 instruct vsub16B(vecX dst, vecX src) %{ 6703 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6704 match(Set dst (SubVB dst src)); 6705 format %{ "psubb $dst,$src\t! sub packed16B" %} 6706 ins_encode %{ 6707 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6708 %} 6709 ins_pipe( pipe_slow ); 6710 %} 6711 6712 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6713 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6714 match(Set dst (SubVB src1 src2)); 6715 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6716 ins_encode %{ 6717 int vector_len = 0; 6718 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6719 %} 6720 ins_pipe( pipe_slow ); 6721 %} 6722 6723 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6724 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6725 match(Set dst (SubVB src (LoadVector mem))); 6726 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6727 ins_encode %{ 6728 int vector_len = 0; 6729 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6730 %} 6731 ins_pipe( pipe_slow ); 6732 %} 6733 6734 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6735 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6736 match(Set dst (SubVB src1 src2)); 6737 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6738 ins_encode %{ 6739 int vector_len = 1; 6740 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6741 %} 6742 ins_pipe( pipe_slow ); 6743 %} 6744 6745 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6746 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6747 match(Set dst (SubVB src (LoadVector mem))); 6748 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6749 ins_encode %{ 6750 int vector_len = 1; 6751 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6757 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6758 match(Set dst (SubVB src1 src2)); 6759 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6760 ins_encode %{ 6761 int vector_len = 2; 6762 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6763 %} 6764 ins_pipe( pipe_slow ); 6765 %} 6766 6767 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6768 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6769 match(Set dst (SubVB src (LoadVector mem))); 6770 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6771 ins_encode %{ 6772 int vector_len = 2; 6773 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6774 %} 6775 ins_pipe( pipe_slow ); 6776 %} 6777 6778 // Shorts/Chars vector sub 6779 instruct vsub2S(vecS dst, vecS src) %{ 6780 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6781 match(Set dst (SubVS dst src)); 6782 format %{ "psubw $dst,$src\t! sub packed2S" %} 6783 ins_encode %{ 6784 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6785 %} 6786 ins_pipe( pipe_slow ); 6787 %} 6788 6789 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6790 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6791 match(Set dst (SubVS src1 src2)); 6792 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6793 ins_encode %{ 6794 int vector_len = 0; 6795 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6796 %} 6797 ins_pipe( pipe_slow ); 6798 %} 6799 6800 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6801 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6802 match(Set dst (SubVS src (LoadVector mem))); 6803 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6804 ins_encode %{ 6805 int vector_len = 0; 6806 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 6811 instruct vsub4S(vecD dst, vecD src) %{ 6812 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6813 match(Set dst (SubVS dst src)); 6814 format %{ "psubw $dst,$src\t! sub packed4S" %} 6815 ins_encode %{ 6816 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6822 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6823 match(Set dst (SubVS src1 src2)); 6824 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6825 ins_encode %{ 6826 int vector_len = 0; 6827 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6828 %} 6829 ins_pipe( pipe_slow ); 6830 %} 6831 6832 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6833 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6834 match(Set dst (SubVS src (LoadVector mem))); 6835 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6836 ins_encode %{ 6837 int vector_len = 0; 6838 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6839 %} 6840 ins_pipe( pipe_slow ); 6841 %} 6842 6843 instruct vsub8S(vecX dst, vecX src) %{ 6844 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6845 match(Set dst (SubVS dst src)); 6846 format %{ "psubw $dst,$src\t! sub packed8S" %} 6847 ins_encode %{ 6848 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6849 %} 6850 ins_pipe( pipe_slow ); 6851 %} 6852 6853 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6854 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6855 match(Set dst (SubVS src1 src2)); 6856 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6857 ins_encode %{ 6858 int vector_len = 0; 6859 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6860 %} 6861 ins_pipe( pipe_slow ); 6862 %} 6863 6864 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6865 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6866 match(Set dst (SubVS src (LoadVector mem))); 6867 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6868 ins_encode %{ 6869 int vector_len = 0; 6870 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6876 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6877 match(Set dst (SubVS src1 src2)); 6878 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6879 ins_encode %{ 6880 int vector_len = 1; 6881 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6882 %} 6883 ins_pipe( pipe_slow ); 6884 %} 6885 6886 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6887 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6888 match(Set dst (SubVS src (LoadVector mem))); 6889 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6890 ins_encode %{ 6891 int vector_len = 1; 6892 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6893 %} 6894 ins_pipe( pipe_slow ); 6895 %} 6896 6897 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6898 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6899 match(Set dst (SubVS src1 src2)); 6900 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6901 ins_encode %{ 6902 int vector_len = 2; 6903 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6909 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6910 match(Set dst (SubVS src (LoadVector mem))); 6911 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6912 ins_encode %{ 6913 int vector_len = 2; 6914 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 // Integers vector sub 6920 instruct vsub2I(vecD dst, vecD src) %{ 6921 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6922 match(Set dst (SubVI dst src)); 6923 format %{ "psubd $dst,$src\t! sub packed2I" %} 6924 ins_encode %{ 6925 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6931 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6932 match(Set dst (SubVI src1 src2)); 6933 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6934 ins_encode %{ 6935 int vector_len = 0; 6936 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6937 %} 6938 ins_pipe( pipe_slow ); 6939 %} 6940 6941 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6942 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6943 match(Set dst (SubVI src (LoadVector mem))); 6944 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6945 ins_encode %{ 6946 int vector_len = 0; 6947 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6948 %} 6949 ins_pipe( pipe_slow ); 6950 %} 6951 6952 instruct vsub4I(vecX dst, vecX src) %{ 6953 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6954 match(Set dst (SubVI dst src)); 6955 format %{ "psubd $dst,$src\t! sub packed4I" %} 6956 ins_encode %{ 6957 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6963 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6964 match(Set dst (SubVI src1 src2)); 6965 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6966 ins_encode %{ 6967 int vector_len = 0; 6968 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6974 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6975 match(Set dst (SubVI src (LoadVector mem))); 6976 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6977 ins_encode %{ 6978 int vector_len = 0; 6979 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6985 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6986 match(Set dst (SubVI src1 src2)); 6987 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6988 ins_encode %{ 6989 int vector_len = 1; 6990 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6996 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6997 match(Set dst (SubVI src (LoadVector mem))); 6998 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6999 ins_encode %{ 7000 int vector_len = 1; 7001 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7007 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7008 match(Set dst (SubVI src1 src2)); 7009 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7010 ins_encode %{ 7011 int vector_len = 2; 7012 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7018 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7019 match(Set dst (SubVI src (LoadVector mem))); 7020 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7021 ins_encode %{ 7022 int vector_len = 2; 7023 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7024 %} 7025 ins_pipe( pipe_slow ); 7026 %} 7027 7028 // Longs vector sub 7029 instruct vsub2L(vecX dst, vecX src) %{ 7030 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7031 match(Set dst (SubVL dst src)); 7032 format %{ "psubq $dst,$src\t! sub packed2L" %} 7033 ins_encode %{ 7034 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7035 %} 7036 ins_pipe( pipe_slow ); 7037 %} 7038 7039 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7040 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7041 match(Set dst (SubVL src1 src2)); 7042 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7043 ins_encode %{ 7044 int vector_len = 0; 7045 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7046 %} 7047 ins_pipe( pipe_slow ); 7048 %} 7049 7050 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7051 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7052 match(Set dst (SubVL src (LoadVector mem))); 7053 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7054 ins_encode %{ 7055 int vector_len = 0; 7056 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7062 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7063 match(Set dst (SubVL src1 src2)); 7064 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7065 ins_encode %{ 7066 int vector_len = 1; 7067 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7068 %} 7069 ins_pipe( pipe_slow ); 7070 %} 7071 7072 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7073 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7074 match(Set dst (SubVL src (LoadVector mem))); 7075 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7076 ins_encode %{ 7077 int vector_len = 1; 7078 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7084 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7085 match(Set dst (SubVL src1 src2)); 7086 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7087 ins_encode %{ 7088 int vector_len = 2; 7089 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7090 %} 7091 ins_pipe( pipe_slow ); 7092 %} 7093 7094 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7095 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7096 match(Set dst (SubVL src (LoadVector mem))); 7097 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7098 ins_encode %{ 7099 int vector_len = 2; 7100 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7101 %} 7102 ins_pipe( pipe_slow ); 7103 %} 7104 7105 // Floats vector sub 7106 instruct vsub2F(vecD dst, vecD src) %{ 7107 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7108 match(Set dst (SubVF dst src)); 7109 format %{ "subps $dst,$src\t! sub packed2F" %} 7110 ins_encode %{ 7111 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7117 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7118 match(Set dst (SubVF src1 src2)); 7119 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7120 ins_encode %{ 7121 int vector_len = 0; 7122 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7128 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7129 match(Set dst (SubVF src (LoadVector mem))); 7130 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7131 ins_encode %{ 7132 int vector_len = 0; 7133 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vsub4F(vecX dst, vecX src) %{ 7139 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7140 match(Set dst (SubVF dst src)); 7141 format %{ "subps $dst,$src\t! sub packed4F" %} 7142 ins_encode %{ 7143 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7144 %} 7145 ins_pipe( pipe_slow ); 7146 %} 7147 7148 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7149 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7150 match(Set dst (SubVF src1 src2)); 7151 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7152 ins_encode %{ 7153 int vector_len = 0; 7154 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7155 %} 7156 ins_pipe( pipe_slow ); 7157 %} 7158 7159 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7160 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7161 match(Set dst (SubVF src (LoadVector mem))); 7162 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7163 ins_encode %{ 7164 int vector_len = 0; 7165 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7166 %} 7167 ins_pipe( pipe_slow ); 7168 %} 7169 7170 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7171 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7172 match(Set dst (SubVF src1 src2)); 7173 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7174 ins_encode %{ 7175 int vector_len = 1; 7176 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7177 %} 7178 ins_pipe( pipe_slow ); 7179 %} 7180 7181 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7182 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7183 match(Set dst (SubVF src (LoadVector mem))); 7184 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7185 ins_encode %{ 7186 int vector_len = 1; 7187 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7193 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7194 match(Set dst (SubVF src1 src2)); 7195 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7196 ins_encode %{ 7197 int vector_len = 2; 7198 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7199 %} 7200 ins_pipe( pipe_slow ); 7201 %} 7202 7203 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7204 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7205 match(Set dst (SubVF src (LoadVector mem))); 7206 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7207 ins_encode %{ 7208 int vector_len = 2; 7209 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7210 %} 7211 ins_pipe( pipe_slow ); 7212 %} 7213 7214 // Doubles vector sub 7215 instruct vsub2D(vecX dst, vecX src) %{ 7216 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7217 match(Set dst (SubVD dst src)); 7218 format %{ "subpd $dst,$src\t! sub packed2D" %} 7219 ins_encode %{ 7220 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7221 %} 7222 ins_pipe( pipe_slow ); 7223 %} 7224 7225 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7226 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7227 match(Set dst (SubVD src1 src2)); 7228 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7229 ins_encode %{ 7230 int vector_len = 0; 7231 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7232 %} 7233 ins_pipe( pipe_slow ); 7234 %} 7235 7236 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7237 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7238 match(Set dst (SubVD src (LoadVector mem))); 7239 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7240 ins_encode %{ 7241 int vector_len = 0; 7242 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7243 %} 7244 ins_pipe( pipe_slow ); 7245 %} 7246 7247 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7248 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7249 match(Set dst (SubVD src1 src2)); 7250 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7251 ins_encode %{ 7252 int vector_len = 1; 7253 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7254 %} 7255 ins_pipe( pipe_slow ); 7256 %} 7257 7258 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7259 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7260 match(Set dst (SubVD src (LoadVector mem))); 7261 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7262 ins_encode %{ 7263 int vector_len = 1; 7264 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7265 %} 7266 ins_pipe( pipe_slow ); 7267 %} 7268 7269 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7270 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7271 match(Set dst (SubVD src1 src2)); 7272 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7273 ins_encode %{ 7274 int vector_len = 2; 7275 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7276 %} 7277 ins_pipe( pipe_slow ); 7278 %} 7279 7280 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7281 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7282 match(Set dst (SubVD src (LoadVector mem))); 7283 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7284 ins_encode %{ 7285 int vector_len = 2; 7286 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7287 %} 7288 ins_pipe( pipe_slow ); 7289 %} 7290 7291 // --------------------------------- MUL -------------------------------------- 7292 7293 // Shorts/Chars vector mul 7294 instruct vmul2S(vecS dst, vecS src) %{ 7295 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7296 match(Set dst (MulVS dst src)); 7297 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7298 ins_encode %{ 7299 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7300 %} 7301 ins_pipe( pipe_slow ); 7302 %} 7303 7304 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7305 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7306 match(Set dst (MulVS src1 src2)); 7307 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7308 ins_encode %{ 7309 int vector_len = 0; 7310 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7311 %} 7312 ins_pipe( pipe_slow ); 7313 %} 7314 7315 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7316 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7317 match(Set dst (MulVS src (LoadVector mem))); 7318 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7319 ins_encode %{ 7320 int vector_len = 0; 7321 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7322 %} 7323 ins_pipe( pipe_slow ); 7324 %} 7325 7326 instruct vmul4S(vecD dst, vecD src) %{ 7327 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7328 match(Set dst (MulVS dst src)); 7329 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7330 ins_encode %{ 7331 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7332 %} 7333 ins_pipe( pipe_slow ); 7334 %} 7335 7336 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7337 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7338 match(Set dst (MulVS src1 src2)); 7339 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7340 ins_encode %{ 7341 int vector_len = 0; 7342 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7343 %} 7344 ins_pipe( pipe_slow ); 7345 %} 7346 7347 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7348 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7349 match(Set dst (MulVS src (LoadVector mem))); 7350 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7351 ins_encode %{ 7352 int vector_len = 0; 7353 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7354 %} 7355 ins_pipe( pipe_slow ); 7356 %} 7357 7358 instruct vmul8S(vecX dst, vecX src) %{ 7359 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7360 match(Set dst (MulVS dst src)); 7361 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7362 ins_encode %{ 7363 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7364 %} 7365 ins_pipe( pipe_slow ); 7366 %} 7367 7368 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7369 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7370 match(Set dst (MulVS src1 src2)); 7371 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7372 ins_encode %{ 7373 int vector_len = 0; 7374 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7375 %} 7376 ins_pipe( pipe_slow ); 7377 %} 7378 7379 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7380 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7381 match(Set dst (MulVS src (LoadVector mem))); 7382 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7383 ins_encode %{ 7384 int vector_len = 0; 7385 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7391 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7392 match(Set dst (MulVS src1 src2)); 7393 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7394 ins_encode %{ 7395 int vector_len = 1; 7396 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7397 %} 7398 ins_pipe( pipe_slow ); 7399 %} 7400 7401 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7402 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7403 match(Set dst (MulVS src (LoadVector mem))); 7404 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7405 ins_encode %{ 7406 int vector_len = 1; 7407 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7408 %} 7409 ins_pipe( pipe_slow ); 7410 %} 7411 7412 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7413 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7414 match(Set dst (MulVS src1 src2)); 7415 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7416 ins_encode %{ 7417 int vector_len = 2; 7418 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7419 %} 7420 ins_pipe( pipe_slow ); 7421 %} 7422 7423 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7424 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7425 match(Set dst (MulVS src (LoadVector mem))); 7426 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7427 ins_encode %{ 7428 int vector_len = 2; 7429 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 // Integers vector mul (sse4_1) 7435 instruct vmul2I(vecD dst, vecD src) %{ 7436 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7437 match(Set dst (MulVI dst src)); 7438 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7439 ins_encode %{ 7440 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7441 %} 7442 ins_pipe( pipe_slow ); 7443 %} 7444 7445 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7446 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7447 match(Set dst (MulVI src1 src2)); 7448 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7449 ins_encode %{ 7450 int vector_len = 0; 7451 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7452 %} 7453 ins_pipe( pipe_slow ); 7454 %} 7455 7456 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7457 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7458 match(Set dst (MulVI src (LoadVector mem))); 7459 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7460 ins_encode %{ 7461 int vector_len = 0; 7462 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7463 %} 7464 ins_pipe( pipe_slow ); 7465 %} 7466 7467 instruct vmul4I(vecX dst, vecX src) %{ 7468 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7469 match(Set dst (MulVI dst src)); 7470 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7471 ins_encode %{ 7472 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7473 %} 7474 ins_pipe( pipe_slow ); 7475 %} 7476 7477 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7478 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7479 match(Set dst (MulVI src1 src2)); 7480 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7481 ins_encode %{ 7482 int vector_len = 0; 7483 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7484 %} 7485 ins_pipe( pipe_slow ); 7486 %} 7487 7488 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7489 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7490 match(Set dst (MulVI src (LoadVector mem))); 7491 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7492 ins_encode %{ 7493 int vector_len = 0; 7494 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7495 %} 7496 ins_pipe( pipe_slow ); 7497 %} 7498 7499 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7500 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7501 match(Set dst (MulVL src1 src2)); 7502 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7503 ins_encode %{ 7504 int vector_len = 0; 7505 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7506 %} 7507 ins_pipe( pipe_slow ); 7508 %} 7509 7510 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7511 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7512 match(Set dst (MulVL src (LoadVector mem))); 7513 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7514 ins_encode %{ 7515 int vector_len = 0; 7516 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7517 %} 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7522 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7523 match(Set dst (MulVL src1 src2)); 7524 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7525 ins_encode %{ 7526 int vector_len = 1; 7527 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7528 %} 7529 ins_pipe( pipe_slow ); 7530 %} 7531 7532 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7533 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7534 match(Set dst (MulVL src (LoadVector mem))); 7535 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7536 ins_encode %{ 7537 int vector_len = 1; 7538 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7539 %} 7540 ins_pipe( pipe_slow ); 7541 %} 7542 7543 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7544 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7545 match(Set dst (MulVL src1 src2)); 7546 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7547 ins_encode %{ 7548 int vector_len = 2; 7549 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7550 %} 7551 ins_pipe( pipe_slow ); 7552 %} 7553 7554 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7555 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7556 match(Set dst (MulVL src (LoadVector mem))); 7557 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7558 ins_encode %{ 7559 int vector_len = 2; 7560 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7561 %} 7562 ins_pipe( pipe_slow ); 7563 %} 7564 7565 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7566 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7567 match(Set dst (MulVI src1 src2)); 7568 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7569 ins_encode %{ 7570 int vector_len = 1; 7571 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7572 %} 7573 ins_pipe( pipe_slow ); 7574 %} 7575 7576 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7577 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7578 match(Set dst (MulVI src (LoadVector mem))); 7579 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7580 ins_encode %{ 7581 int vector_len = 1; 7582 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7583 %} 7584 ins_pipe( pipe_slow ); 7585 %} 7586 7587 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7588 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7589 match(Set dst (MulVI src1 src2)); 7590 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7591 ins_encode %{ 7592 int vector_len = 2; 7593 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7594 %} 7595 ins_pipe( pipe_slow ); 7596 %} 7597 7598 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7599 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7600 match(Set dst (MulVI src (LoadVector mem))); 7601 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7602 ins_encode %{ 7603 int vector_len = 2; 7604 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7605 %} 7606 ins_pipe( pipe_slow ); 7607 %} 7608 7609 // Floats vector mul 7610 instruct vmul2F(vecD dst, vecD src) %{ 7611 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7612 match(Set dst (MulVF dst src)); 7613 format %{ "mulps $dst,$src\t! mul packed2F" %} 7614 ins_encode %{ 7615 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7616 %} 7617 ins_pipe( pipe_slow ); 7618 %} 7619 7620 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7621 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7622 match(Set dst (MulVF src1 src2)); 7623 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7624 ins_encode %{ 7625 int vector_len = 0; 7626 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7627 %} 7628 ins_pipe( pipe_slow ); 7629 %} 7630 7631 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7632 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7633 match(Set dst (MulVF src (LoadVector mem))); 7634 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7635 ins_encode %{ 7636 int vector_len = 0; 7637 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7638 %} 7639 ins_pipe( pipe_slow ); 7640 %} 7641 7642 instruct vmul4F(vecX dst, vecX src) %{ 7643 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7644 match(Set dst (MulVF dst src)); 7645 format %{ "mulps $dst,$src\t! mul packed4F" %} 7646 ins_encode %{ 7647 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7648 %} 7649 ins_pipe( pipe_slow ); 7650 %} 7651 7652 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7653 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7654 match(Set dst (MulVF src1 src2)); 7655 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7656 ins_encode %{ 7657 int vector_len = 0; 7658 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7659 %} 7660 ins_pipe( pipe_slow ); 7661 %} 7662 7663 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7664 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7665 match(Set dst (MulVF src (LoadVector mem))); 7666 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7667 ins_encode %{ 7668 int vector_len = 0; 7669 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7670 %} 7671 ins_pipe( pipe_slow ); 7672 %} 7673 7674 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7675 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7676 match(Set dst (MulVF src1 src2)); 7677 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7678 ins_encode %{ 7679 int vector_len = 1; 7680 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7681 %} 7682 ins_pipe( pipe_slow ); 7683 %} 7684 7685 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7686 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7687 match(Set dst (MulVF src (LoadVector mem))); 7688 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7689 ins_encode %{ 7690 int vector_len = 1; 7691 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7692 %} 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7697 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7698 match(Set dst (MulVF src1 src2)); 7699 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7700 ins_encode %{ 7701 int vector_len = 2; 7702 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7708 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7709 match(Set dst (MulVF src (LoadVector mem))); 7710 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7711 ins_encode %{ 7712 int vector_len = 2; 7713 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 // Doubles vector mul 7719 instruct vmul2D(vecX dst, vecX src) %{ 7720 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7721 match(Set dst (MulVD dst src)); 7722 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7723 ins_encode %{ 7724 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7730 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7731 match(Set dst (MulVD src1 src2)); 7732 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7733 ins_encode %{ 7734 int vector_len = 0; 7735 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7736 %} 7737 ins_pipe( pipe_slow ); 7738 %} 7739 7740 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7741 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7742 match(Set dst (MulVD src (LoadVector mem))); 7743 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7744 ins_encode %{ 7745 int vector_len = 0; 7746 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7747 %} 7748 ins_pipe( pipe_slow ); 7749 %} 7750 7751 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7752 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7753 match(Set dst (MulVD src1 src2)); 7754 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7755 ins_encode %{ 7756 int vector_len = 1; 7757 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7758 %} 7759 ins_pipe( pipe_slow ); 7760 %} 7761 7762 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7763 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7764 match(Set dst (MulVD src (LoadVector mem))); 7765 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7766 ins_encode %{ 7767 int vector_len = 1; 7768 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7769 %} 7770 ins_pipe( pipe_slow ); 7771 %} 7772 7773 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7774 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7775 match(Set dst (MulVD src1 src2)); 7776 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7777 ins_encode %{ 7778 int vector_len = 2; 7779 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7780 %} 7781 ins_pipe( pipe_slow ); 7782 %} 7783 7784 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7785 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7786 match(Set dst (MulVD src (LoadVector mem))); 7787 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7788 ins_encode %{ 7789 int vector_len = 2; 7790 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7791 %} 7792 ins_pipe( pipe_slow ); 7793 %} 7794 7795 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7796 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7797 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7798 effect(TEMP dst, USE src1, USE src2); 7799 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7800 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7801 %} 7802 ins_encode %{ 7803 int vector_len = 1; 7804 int cond = (Assembler::Condition)($copnd$$cmpcode); 7805 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7806 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7807 %} 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7812 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7813 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7814 effect(TEMP dst, USE src1, USE src2); 7815 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7816 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7817 %} 7818 ins_encode %{ 7819 int vector_len = 1; 7820 int cond = (Assembler::Condition)($copnd$$cmpcode); 7821 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7822 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7823 %} 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 // --------------------------------- DIV -------------------------------------- 7828 7829 // Floats vector div 7830 instruct vdiv2F(vecD dst, vecD src) %{ 7831 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7832 match(Set dst (DivVF dst src)); 7833 format %{ "divps $dst,$src\t! div packed2F" %} 7834 ins_encode %{ 7835 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7841 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7842 match(Set dst (DivVF src1 src2)); 7843 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7844 ins_encode %{ 7845 int vector_len = 0; 7846 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7852 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7853 match(Set dst (DivVF src (LoadVector mem))); 7854 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7855 ins_encode %{ 7856 int vector_len = 0; 7857 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 instruct vdiv4F(vecX dst, vecX src) %{ 7863 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7864 match(Set dst (DivVF dst src)); 7865 format %{ "divps $dst,$src\t! div packed4F" %} 7866 ins_encode %{ 7867 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7868 %} 7869 ins_pipe( pipe_slow ); 7870 %} 7871 7872 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7873 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7874 match(Set dst (DivVF src1 src2)); 7875 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7876 ins_encode %{ 7877 int vector_len = 0; 7878 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7879 %} 7880 ins_pipe( pipe_slow ); 7881 %} 7882 7883 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7884 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7885 match(Set dst (DivVF src (LoadVector mem))); 7886 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7887 ins_encode %{ 7888 int vector_len = 0; 7889 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7890 %} 7891 ins_pipe( pipe_slow ); 7892 %} 7893 7894 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7895 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7896 match(Set dst (DivVF src1 src2)); 7897 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7898 ins_encode %{ 7899 int vector_len = 1; 7900 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7901 %} 7902 ins_pipe( pipe_slow ); 7903 %} 7904 7905 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7906 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7907 match(Set dst (DivVF src (LoadVector mem))); 7908 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7909 ins_encode %{ 7910 int vector_len = 1; 7911 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7912 %} 7913 ins_pipe( pipe_slow ); 7914 %} 7915 7916 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7917 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7918 match(Set dst (DivVF src1 src2)); 7919 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7920 ins_encode %{ 7921 int vector_len = 2; 7922 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7923 %} 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7928 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7929 match(Set dst (DivVF src (LoadVector mem))); 7930 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7931 ins_encode %{ 7932 int vector_len = 2; 7933 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7934 %} 7935 ins_pipe( pipe_slow ); 7936 %} 7937 7938 // Doubles vector div 7939 instruct vdiv2D(vecX dst, vecX src) %{ 7940 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7941 match(Set dst (DivVD dst src)); 7942 format %{ "divpd $dst,$src\t! div packed2D" %} 7943 ins_encode %{ 7944 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7945 %} 7946 ins_pipe( pipe_slow ); 7947 %} 7948 7949 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7950 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7951 match(Set dst (DivVD src1 src2)); 7952 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7953 ins_encode %{ 7954 int vector_len = 0; 7955 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7956 %} 7957 ins_pipe( pipe_slow ); 7958 %} 7959 7960 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7961 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7962 match(Set dst (DivVD src (LoadVector mem))); 7963 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7964 ins_encode %{ 7965 int vector_len = 0; 7966 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7967 %} 7968 ins_pipe( pipe_slow ); 7969 %} 7970 7971 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7973 match(Set dst (DivVD src1 src2)); 7974 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7975 ins_encode %{ 7976 int vector_len = 1; 7977 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7978 %} 7979 ins_pipe( pipe_slow ); 7980 %} 7981 7982 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7983 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7984 match(Set dst (DivVD src (LoadVector mem))); 7985 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7986 ins_encode %{ 7987 int vector_len = 1; 7988 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7989 %} 7990 ins_pipe( pipe_slow ); 7991 %} 7992 7993 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7994 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7995 match(Set dst (DivVD src1 src2)); 7996 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 7997 ins_encode %{ 7998 int vector_len = 2; 7999 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8000 %} 8001 ins_pipe( pipe_slow ); 8002 %} 8003 8004 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8005 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8006 match(Set dst (DivVD src (LoadVector mem))); 8007 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8008 ins_encode %{ 8009 int vector_len = 2; 8010 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8011 %} 8012 ins_pipe( pipe_slow ); 8013 %} 8014 8015 // ------------------------------ Shift --------------------------------------- 8016 8017 // Left and right shift count vectors are the same on x86 8018 // (only lowest bits of xmm reg are used for count). 8019 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8020 match(Set dst (LShiftCntV cnt)); 8021 match(Set dst (RShiftCntV cnt)); 8022 format %{ "movd $dst,$cnt\t! load shift count" %} 8023 ins_encode %{ 8024 __ movdl($dst$$XMMRegister, $cnt$$Register); 8025 %} 8026 ins_pipe( pipe_slow ); 8027 %} 8028 8029 // --------------------------------- Sqrt -------------------------------------- 8030 8031 // Floating point vector sqrt 8032 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8033 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8034 match(Set dst (SqrtVD src)); 8035 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8036 ins_encode %{ 8037 int vector_len = 0; 8038 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8039 %} 8040 ins_pipe( pipe_slow ); 8041 %} 8042 8043 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8045 match(Set dst (SqrtVD (LoadVector mem))); 8046 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8047 ins_encode %{ 8048 int vector_len = 0; 8049 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8050 %} 8051 ins_pipe( pipe_slow ); 8052 %} 8053 8054 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8055 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8056 match(Set dst (SqrtVD src)); 8057 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8058 ins_encode %{ 8059 int vector_len = 1; 8060 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8061 %} 8062 ins_pipe( pipe_slow ); 8063 %} 8064 8065 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8066 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8067 match(Set dst (SqrtVD (LoadVector mem))); 8068 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8069 ins_encode %{ 8070 int vector_len = 1; 8071 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8072 %} 8073 ins_pipe( pipe_slow ); 8074 %} 8075 8076 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8077 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8078 match(Set dst (SqrtVD src)); 8079 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8080 ins_encode %{ 8081 int vector_len = 2; 8082 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8083 %} 8084 ins_pipe( pipe_slow ); 8085 %} 8086 8087 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8088 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8089 match(Set dst (SqrtVD (LoadVector mem))); 8090 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8091 ins_encode %{ 8092 int vector_len = 2; 8093 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8094 %} 8095 ins_pipe( pipe_slow ); 8096 %} 8097 8098 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8099 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8100 match(Set dst (SqrtVF src)); 8101 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8102 ins_encode %{ 8103 int vector_len = 0; 8104 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8105 %} 8106 ins_pipe( pipe_slow ); 8107 %} 8108 8109 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8111 match(Set dst (SqrtVF (LoadVector mem))); 8112 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8113 ins_encode %{ 8114 int vector_len = 0; 8115 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8116 %} 8117 ins_pipe( pipe_slow ); 8118 %} 8119 8120 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8122 match(Set dst (SqrtVF src)); 8123 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8124 ins_encode %{ 8125 int vector_len = 0; 8126 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8127 %} 8128 ins_pipe( pipe_slow ); 8129 %} 8130 8131 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8132 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8133 match(Set dst (SqrtVF (LoadVector mem))); 8134 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8135 ins_encode %{ 8136 int vector_len = 0; 8137 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8138 %} 8139 ins_pipe( pipe_slow ); 8140 %} 8141 8142 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8143 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8144 match(Set dst (SqrtVF src)); 8145 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8146 ins_encode %{ 8147 int vector_len = 1; 8148 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8149 %} 8150 ins_pipe( pipe_slow ); 8151 %} 8152 8153 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8154 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8155 match(Set dst (SqrtVF (LoadVector mem))); 8156 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8157 ins_encode %{ 8158 int vector_len = 1; 8159 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8160 %} 8161 ins_pipe( pipe_slow ); 8162 %} 8163 8164 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8165 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8166 match(Set dst (SqrtVF src)); 8167 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8168 ins_encode %{ 8169 int vector_len = 2; 8170 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8171 %} 8172 ins_pipe( pipe_slow ); 8173 %} 8174 8175 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8176 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8177 match(Set dst (SqrtVF (LoadVector mem))); 8178 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8179 ins_encode %{ 8180 int vector_len = 2; 8181 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8182 %} 8183 ins_pipe( pipe_slow ); 8184 %} 8185 8186 // ------------------------------ LeftShift ----------------------------------- 8187 8188 // Shorts/Chars vector left shift 8189 instruct vsll2S(vecS dst, vecS shift) %{ 8190 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8191 match(Set dst (LShiftVS dst shift)); 8192 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8193 ins_encode %{ 8194 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8195 %} 8196 ins_pipe( pipe_slow ); 8197 %} 8198 8199 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8200 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8201 match(Set dst (LShiftVS dst shift)); 8202 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8203 ins_encode %{ 8204 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8205 %} 8206 ins_pipe( pipe_slow ); 8207 %} 8208 8209 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8210 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8211 match(Set dst (LShiftVS src shift)); 8212 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8213 ins_encode %{ 8214 int vector_len = 0; 8215 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8216 %} 8217 ins_pipe( pipe_slow ); 8218 %} 8219 8220 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8221 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8222 match(Set dst (LShiftVS src shift)); 8223 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8224 ins_encode %{ 8225 int vector_len = 0; 8226 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8227 %} 8228 ins_pipe( pipe_slow ); 8229 %} 8230 8231 instruct vsll4S(vecD dst, vecS shift) %{ 8232 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8233 match(Set dst (LShiftVS dst shift)); 8234 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8235 ins_encode %{ 8236 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8237 %} 8238 ins_pipe( pipe_slow ); 8239 %} 8240 8241 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8242 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8243 match(Set dst (LShiftVS dst shift)); 8244 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8245 ins_encode %{ 8246 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8247 %} 8248 ins_pipe( pipe_slow ); 8249 %} 8250 8251 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8252 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8253 match(Set dst (LShiftVS src shift)); 8254 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8255 ins_encode %{ 8256 int vector_len = 0; 8257 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8258 %} 8259 ins_pipe( pipe_slow ); 8260 %} 8261 8262 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8263 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8264 match(Set dst (LShiftVS src shift)); 8265 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8266 ins_encode %{ 8267 int vector_len = 0; 8268 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8269 %} 8270 ins_pipe( pipe_slow ); 8271 %} 8272 8273 instruct vsll8S(vecX dst, vecS shift) %{ 8274 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8275 match(Set dst (LShiftVS dst shift)); 8276 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8277 ins_encode %{ 8278 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8279 %} 8280 ins_pipe( pipe_slow ); 8281 %} 8282 8283 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8284 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8285 match(Set dst (LShiftVS dst shift)); 8286 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8287 ins_encode %{ 8288 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8289 %} 8290 ins_pipe( pipe_slow ); 8291 %} 8292 8293 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8294 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8295 match(Set dst (LShiftVS src shift)); 8296 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8297 ins_encode %{ 8298 int vector_len = 0; 8299 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8300 %} 8301 ins_pipe( pipe_slow ); 8302 %} 8303 8304 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8305 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8306 match(Set dst (LShiftVS src shift)); 8307 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8308 ins_encode %{ 8309 int vector_len = 0; 8310 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8311 %} 8312 ins_pipe( pipe_slow ); 8313 %} 8314 8315 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8316 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8317 match(Set dst (LShiftVS src shift)); 8318 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8319 ins_encode %{ 8320 int vector_len = 1; 8321 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8322 %} 8323 ins_pipe( pipe_slow ); 8324 %} 8325 8326 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8327 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8328 match(Set dst (LShiftVS src shift)); 8329 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8330 ins_encode %{ 8331 int vector_len = 1; 8332 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8333 %} 8334 ins_pipe( pipe_slow ); 8335 %} 8336 8337 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8338 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8339 match(Set dst (LShiftVS src shift)); 8340 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8341 ins_encode %{ 8342 int vector_len = 2; 8343 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8344 %} 8345 ins_pipe( pipe_slow ); 8346 %} 8347 8348 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8349 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8350 match(Set dst (LShiftVS src shift)); 8351 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8352 ins_encode %{ 8353 int vector_len = 2; 8354 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8355 %} 8356 ins_pipe( pipe_slow ); 8357 %} 8358 8359 // Integers vector left shift 8360 instruct vsll2I(vecD dst, vecS shift) %{ 8361 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8362 match(Set dst (LShiftVI dst shift)); 8363 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8364 ins_encode %{ 8365 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8366 %} 8367 ins_pipe( pipe_slow ); 8368 %} 8369 8370 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8371 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8372 match(Set dst (LShiftVI dst shift)); 8373 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8374 ins_encode %{ 8375 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8376 %} 8377 ins_pipe( pipe_slow ); 8378 %} 8379 8380 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8381 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8382 match(Set dst (LShiftVI src shift)); 8383 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8384 ins_encode %{ 8385 int vector_len = 0; 8386 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8387 %} 8388 ins_pipe( pipe_slow ); 8389 %} 8390 8391 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8392 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8393 match(Set dst (LShiftVI src shift)); 8394 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8395 ins_encode %{ 8396 int vector_len = 0; 8397 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vsll4I(vecX dst, vecS shift) %{ 8403 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8404 match(Set dst (LShiftVI dst shift)); 8405 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8406 ins_encode %{ 8407 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8408 %} 8409 ins_pipe( pipe_slow ); 8410 %} 8411 8412 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8413 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8414 match(Set dst (LShiftVI dst shift)); 8415 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8416 ins_encode %{ 8417 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8418 %} 8419 ins_pipe( pipe_slow ); 8420 %} 8421 8422 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8423 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8424 match(Set dst (LShiftVI src shift)); 8425 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8426 ins_encode %{ 8427 int vector_len = 0; 8428 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8429 %} 8430 ins_pipe( pipe_slow ); 8431 %} 8432 8433 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8434 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8435 match(Set dst (LShiftVI src shift)); 8436 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8437 ins_encode %{ 8438 int vector_len = 0; 8439 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8445 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8446 match(Set dst (LShiftVI src shift)); 8447 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8448 ins_encode %{ 8449 int vector_len = 1; 8450 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8451 %} 8452 ins_pipe( pipe_slow ); 8453 %} 8454 8455 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8456 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8457 match(Set dst (LShiftVI src shift)); 8458 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8459 ins_encode %{ 8460 int vector_len = 1; 8461 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8462 %} 8463 ins_pipe( pipe_slow ); 8464 %} 8465 8466 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8467 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8468 match(Set dst (LShiftVI src shift)); 8469 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8470 ins_encode %{ 8471 int vector_len = 2; 8472 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8473 %} 8474 ins_pipe( pipe_slow ); 8475 %} 8476 8477 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8478 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8479 match(Set dst (LShiftVI src shift)); 8480 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8481 ins_encode %{ 8482 int vector_len = 2; 8483 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8484 %} 8485 ins_pipe( pipe_slow ); 8486 %} 8487 8488 // Longs vector left shift 8489 instruct vsll2L(vecX dst, vecS shift) %{ 8490 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8491 match(Set dst (LShiftVL dst shift)); 8492 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8493 ins_encode %{ 8494 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8500 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8501 match(Set dst (LShiftVL dst shift)); 8502 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8503 ins_encode %{ 8504 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8505 %} 8506 ins_pipe( pipe_slow ); 8507 %} 8508 8509 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8510 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8511 match(Set dst (LShiftVL src shift)); 8512 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8513 ins_encode %{ 8514 int vector_len = 0; 8515 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8516 %} 8517 ins_pipe( pipe_slow ); 8518 %} 8519 8520 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8521 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8522 match(Set dst (LShiftVL src shift)); 8523 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8524 ins_encode %{ 8525 int vector_len = 0; 8526 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8527 %} 8528 ins_pipe( pipe_slow ); 8529 %} 8530 8531 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8532 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8533 match(Set dst (LShiftVL src shift)); 8534 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8535 ins_encode %{ 8536 int vector_len = 1; 8537 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8538 %} 8539 ins_pipe( pipe_slow ); 8540 %} 8541 8542 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8543 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8544 match(Set dst (LShiftVL src shift)); 8545 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8546 ins_encode %{ 8547 int vector_len = 1; 8548 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8549 %} 8550 ins_pipe( pipe_slow ); 8551 %} 8552 8553 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8554 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8555 match(Set dst (LShiftVL src shift)); 8556 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8557 ins_encode %{ 8558 int vector_len = 2; 8559 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8560 %} 8561 ins_pipe( pipe_slow ); 8562 %} 8563 8564 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8565 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8566 match(Set dst (LShiftVL src shift)); 8567 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8568 ins_encode %{ 8569 int vector_len = 2; 8570 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8571 %} 8572 ins_pipe( pipe_slow ); 8573 %} 8574 8575 // ----------------------- LogicalRightShift ----------------------------------- 8576 8577 // Shorts vector logical right shift produces incorrect Java result 8578 // for negative data because java code convert short value into int with 8579 // sign extension before a shift. But char vectors are fine since chars are 8580 // unsigned values. 8581 8582 instruct vsrl2S(vecS dst, vecS shift) %{ 8583 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8584 match(Set dst (URShiftVS dst shift)); 8585 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8586 ins_encode %{ 8587 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8588 %} 8589 ins_pipe( pipe_slow ); 8590 %} 8591 8592 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8593 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8594 match(Set dst (URShiftVS dst shift)); 8595 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8596 ins_encode %{ 8597 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8598 %} 8599 ins_pipe( pipe_slow ); 8600 %} 8601 8602 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8604 match(Set dst (URShiftVS src shift)); 8605 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8606 ins_encode %{ 8607 int vector_len = 0; 8608 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8609 %} 8610 ins_pipe( pipe_slow ); 8611 %} 8612 8613 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8615 match(Set dst (URShiftVS src shift)); 8616 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8617 ins_encode %{ 8618 int vector_len = 0; 8619 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8620 %} 8621 ins_pipe( pipe_slow ); 8622 %} 8623 8624 instruct vsrl4S(vecD dst, vecS shift) %{ 8625 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8626 match(Set dst (URShiftVS dst shift)); 8627 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8628 ins_encode %{ 8629 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8630 %} 8631 ins_pipe( pipe_slow ); 8632 %} 8633 8634 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8635 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8636 match(Set dst (URShiftVS dst shift)); 8637 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8638 ins_encode %{ 8639 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8640 %} 8641 ins_pipe( pipe_slow ); 8642 %} 8643 8644 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8645 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8646 match(Set dst (URShiftVS src shift)); 8647 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8648 ins_encode %{ 8649 int vector_len = 0; 8650 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8651 %} 8652 ins_pipe( pipe_slow ); 8653 %} 8654 8655 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8656 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8657 match(Set dst (URShiftVS src shift)); 8658 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8659 ins_encode %{ 8660 int vector_len = 0; 8661 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8662 %} 8663 ins_pipe( pipe_slow ); 8664 %} 8665 8666 instruct vsrl8S(vecX dst, vecS shift) %{ 8667 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8668 match(Set dst (URShiftVS dst shift)); 8669 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8670 ins_encode %{ 8671 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 8676 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8677 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8678 match(Set dst (URShiftVS dst shift)); 8679 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8680 ins_encode %{ 8681 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8682 %} 8683 ins_pipe( pipe_slow ); 8684 %} 8685 8686 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8687 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8688 match(Set dst (URShiftVS src shift)); 8689 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8690 ins_encode %{ 8691 int vector_len = 0; 8692 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8693 %} 8694 ins_pipe( pipe_slow ); 8695 %} 8696 8697 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8698 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8699 match(Set dst (URShiftVS src shift)); 8700 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8701 ins_encode %{ 8702 int vector_len = 0; 8703 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8704 %} 8705 ins_pipe( pipe_slow ); 8706 %} 8707 8708 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8709 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8710 match(Set dst (URShiftVS src shift)); 8711 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8712 ins_encode %{ 8713 int vector_len = 1; 8714 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8715 %} 8716 ins_pipe( pipe_slow ); 8717 %} 8718 8719 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8720 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8721 match(Set dst (URShiftVS src shift)); 8722 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8723 ins_encode %{ 8724 int vector_len = 1; 8725 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8726 %} 8727 ins_pipe( pipe_slow ); 8728 %} 8729 8730 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8731 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8732 match(Set dst (URShiftVS src shift)); 8733 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8734 ins_encode %{ 8735 int vector_len = 2; 8736 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8737 %} 8738 ins_pipe( pipe_slow ); 8739 %} 8740 8741 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8742 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8743 match(Set dst (URShiftVS src shift)); 8744 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8745 ins_encode %{ 8746 int vector_len = 2; 8747 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8748 %} 8749 ins_pipe( pipe_slow ); 8750 %} 8751 8752 // Integers vector logical right shift 8753 instruct vsrl2I(vecD dst, vecS shift) %{ 8754 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8755 match(Set dst (URShiftVI dst shift)); 8756 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8757 ins_encode %{ 8758 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8759 %} 8760 ins_pipe( pipe_slow ); 8761 %} 8762 8763 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8764 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8765 match(Set dst (URShiftVI dst shift)); 8766 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8767 ins_encode %{ 8768 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8769 %} 8770 ins_pipe( pipe_slow ); 8771 %} 8772 8773 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8774 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8775 match(Set dst (URShiftVI src shift)); 8776 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8777 ins_encode %{ 8778 int vector_len = 0; 8779 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8780 %} 8781 ins_pipe( pipe_slow ); 8782 %} 8783 8784 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8785 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8786 match(Set dst (URShiftVI src shift)); 8787 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8788 ins_encode %{ 8789 int vector_len = 0; 8790 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8791 %} 8792 ins_pipe( pipe_slow ); 8793 %} 8794 8795 instruct vsrl4I(vecX dst, vecS shift) %{ 8796 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8797 match(Set dst (URShiftVI dst shift)); 8798 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8799 ins_encode %{ 8800 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8806 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8807 match(Set dst (URShiftVI dst shift)); 8808 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8809 ins_encode %{ 8810 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8811 %} 8812 ins_pipe( pipe_slow ); 8813 %} 8814 8815 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8816 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8817 match(Set dst (URShiftVI src shift)); 8818 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8819 ins_encode %{ 8820 int vector_len = 0; 8821 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8827 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8828 match(Set dst (URShiftVI src shift)); 8829 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8830 ins_encode %{ 8831 int vector_len = 0; 8832 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8833 %} 8834 ins_pipe( pipe_slow ); 8835 %} 8836 8837 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8838 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8839 match(Set dst (URShiftVI src shift)); 8840 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8841 ins_encode %{ 8842 int vector_len = 1; 8843 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8844 %} 8845 ins_pipe( pipe_slow ); 8846 %} 8847 8848 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8849 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8850 match(Set dst (URShiftVI src shift)); 8851 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8852 ins_encode %{ 8853 int vector_len = 1; 8854 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8855 %} 8856 ins_pipe( pipe_slow ); 8857 %} 8858 8859 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8860 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8861 match(Set dst (URShiftVI src shift)); 8862 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8863 ins_encode %{ 8864 int vector_len = 2; 8865 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8866 %} 8867 ins_pipe( pipe_slow ); 8868 %} 8869 8870 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8871 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8872 match(Set dst (URShiftVI src shift)); 8873 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8874 ins_encode %{ 8875 int vector_len = 2; 8876 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8877 %} 8878 ins_pipe( pipe_slow ); 8879 %} 8880 8881 // Longs vector logical right shift 8882 instruct vsrl2L(vecX dst, vecS shift) %{ 8883 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8884 match(Set dst (URShiftVL dst shift)); 8885 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8886 ins_encode %{ 8887 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8888 %} 8889 ins_pipe( pipe_slow ); 8890 %} 8891 8892 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8893 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8894 match(Set dst (URShiftVL dst shift)); 8895 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8896 ins_encode %{ 8897 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8898 %} 8899 ins_pipe( pipe_slow ); 8900 %} 8901 8902 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8903 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8904 match(Set dst (URShiftVL src shift)); 8905 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8906 ins_encode %{ 8907 int vector_len = 0; 8908 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8909 %} 8910 ins_pipe( pipe_slow ); 8911 %} 8912 8913 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8914 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8915 match(Set dst (URShiftVL src shift)); 8916 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8917 ins_encode %{ 8918 int vector_len = 0; 8919 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8920 %} 8921 ins_pipe( pipe_slow ); 8922 %} 8923 8924 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8925 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8926 match(Set dst (URShiftVL src shift)); 8927 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8928 ins_encode %{ 8929 int vector_len = 1; 8930 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8931 %} 8932 ins_pipe( pipe_slow ); 8933 %} 8934 8935 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8936 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8937 match(Set dst (URShiftVL src shift)); 8938 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8939 ins_encode %{ 8940 int vector_len = 1; 8941 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8942 %} 8943 ins_pipe( pipe_slow ); 8944 %} 8945 8946 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8947 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8948 match(Set dst (URShiftVL src shift)); 8949 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8950 ins_encode %{ 8951 int vector_len = 2; 8952 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8953 %} 8954 ins_pipe( pipe_slow ); 8955 %} 8956 8957 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8958 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8959 match(Set dst (URShiftVL src shift)); 8960 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8961 ins_encode %{ 8962 int vector_len = 2; 8963 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8964 %} 8965 ins_pipe( pipe_slow ); 8966 %} 8967 8968 // ------------------- ArithmeticRightShift ----------------------------------- 8969 8970 // Shorts/Chars vector arithmetic right shift 8971 instruct vsra2S(vecS dst, vecS shift) %{ 8972 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8973 match(Set dst (RShiftVS dst shift)); 8974 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8975 ins_encode %{ 8976 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8982 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8983 match(Set dst (RShiftVS dst shift)); 8984 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8985 ins_encode %{ 8986 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8987 %} 8988 ins_pipe( pipe_slow ); 8989 %} 8990 8991 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8992 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8993 match(Set dst (RShiftVS src shift)); 8994 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8995 ins_encode %{ 8996 int vector_len = 0; 8997 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8998 %} 8999 ins_pipe( pipe_slow ); 9000 %} 9001 9002 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9003 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9004 match(Set dst (RShiftVS src shift)); 9005 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9006 ins_encode %{ 9007 int vector_len = 0; 9008 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9009 %} 9010 ins_pipe( pipe_slow ); 9011 %} 9012 9013 instruct vsra4S(vecD dst, vecS shift) %{ 9014 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9015 match(Set dst (RShiftVS dst shift)); 9016 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9017 ins_encode %{ 9018 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9019 %} 9020 ins_pipe( pipe_slow ); 9021 %} 9022 9023 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9024 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9025 match(Set dst (RShiftVS dst shift)); 9026 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9027 ins_encode %{ 9028 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9029 %} 9030 ins_pipe( pipe_slow ); 9031 %} 9032 9033 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9034 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9035 match(Set dst (RShiftVS src shift)); 9036 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9037 ins_encode %{ 9038 int vector_len = 0; 9039 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9045 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9046 match(Set dst (RShiftVS src shift)); 9047 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9048 ins_encode %{ 9049 int vector_len = 0; 9050 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9051 %} 9052 ins_pipe( pipe_slow ); 9053 %} 9054 9055 instruct vsra8S(vecX dst, vecS shift) %{ 9056 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9057 match(Set dst (RShiftVS dst shift)); 9058 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9059 ins_encode %{ 9060 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9061 %} 9062 ins_pipe( pipe_slow ); 9063 %} 9064 9065 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9066 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9067 match(Set dst (RShiftVS dst shift)); 9068 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9069 ins_encode %{ 9070 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9071 %} 9072 ins_pipe( pipe_slow ); 9073 %} 9074 9075 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9076 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9077 match(Set dst (RShiftVS src shift)); 9078 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9079 ins_encode %{ 9080 int vector_len = 0; 9081 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9082 %} 9083 ins_pipe( pipe_slow ); 9084 %} 9085 9086 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9087 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9088 match(Set dst (RShiftVS src shift)); 9089 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9090 ins_encode %{ 9091 int vector_len = 0; 9092 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9098 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9099 match(Set dst (RShiftVS src shift)); 9100 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9101 ins_encode %{ 9102 int vector_len = 1; 9103 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9104 %} 9105 ins_pipe( pipe_slow ); 9106 %} 9107 9108 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9109 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9110 match(Set dst (RShiftVS src shift)); 9111 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9112 ins_encode %{ 9113 int vector_len = 1; 9114 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9115 %} 9116 ins_pipe( pipe_slow ); 9117 %} 9118 9119 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9120 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9121 match(Set dst (RShiftVS src shift)); 9122 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9123 ins_encode %{ 9124 int vector_len = 2; 9125 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9126 %} 9127 ins_pipe( pipe_slow ); 9128 %} 9129 9130 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9131 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9132 match(Set dst (RShiftVS src shift)); 9133 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9134 ins_encode %{ 9135 int vector_len = 2; 9136 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 // Integers vector arithmetic right shift 9142 instruct vsra2I(vecD dst, vecS shift) %{ 9143 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9144 match(Set dst (RShiftVI dst shift)); 9145 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9146 ins_encode %{ 9147 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9148 %} 9149 ins_pipe( pipe_slow ); 9150 %} 9151 9152 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9153 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9154 match(Set dst (RShiftVI dst shift)); 9155 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9156 ins_encode %{ 9157 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9158 %} 9159 ins_pipe( pipe_slow ); 9160 %} 9161 9162 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9163 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9164 match(Set dst (RShiftVI src shift)); 9165 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9166 ins_encode %{ 9167 int vector_len = 0; 9168 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9169 %} 9170 ins_pipe( pipe_slow ); 9171 %} 9172 9173 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9174 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9175 match(Set dst (RShiftVI src shift)); 9176 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9177 ins_encode %{ 9178 int vector_len = 0; 9179 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9180 %} 9181 ins_pipe( pipe_slow ); 9182 %} 9183 9184 instruct vsra4I(vecX dst, vecS shift) %{ 9185 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9186 match(Set dst (RShiftVI dst shift)); 9187 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9188 ins_encode %{ 9189 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9190 %} 9191 ins_pipe( pipe_slow ); 9192 %} 9193 9194 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9195 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9196 match(Set dst (RShiftVI dst shift)); 9197 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9198 ins_encode %{ 9199 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9205 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9206 match(Set dst (RShiftVI src shift)); 9207 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9208 ins_encode %{ 9209 int vector_len = 0; 9210 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9211 %} 9212 ins_pipe( pipe_slow ); 9213 %} 9214 9215 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9216 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9217 match(Set dst (RShiftVI src shift)); 9218 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9219 ins_encode %{ 9220 int vector_len = 0; 9221 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9222 %} 9223 ins_pipe( pipe_slow ); 9224 %} 9225 9226 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9227 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9228 match(Set dst (RShiftVI src shift)); 9229 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9230 ins_encode %{ 9231 int vector_len = 1; 9232 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9233 %} 9234 ins_pipe( pipe_slow ); 9235 %} 9236 9237 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9238 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9239 match(Set dst (RShiftVI src shift)); 9240 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9241 ins_encode %{ 9242 int vector_len = 1; 9243 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9244 %} 9245 ins_pipe( pipe_slow ); 9246 %} 9247 9248 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9249 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9250 match(Set dst (RShiftVI src shift)); 9251 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9252 ins_encode %{ 9253 int vector_len = 2; 9254 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9255 %} 9256 ins_pipe( pipe_slow ); 9257 %} 9258 9259 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9260 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9261 match(Set dst (RShiftVI src shift)); 9262 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9263 ins_encode %{ 9264 int vector_len = 2; 9265 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9266 %} 9267 ins_pipe( pipe_slow ); 9268 %} 9269 9270 // There are no longs vector arithmetic right shift instructions. 9271 9272 9273 // --------------------------------- AND -------------------------------------- 9274 9275 instruct vand4B(vecS dst, vecS src) %{ 9276 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9277 match(Set dst (AndV dst src)); 9278 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9279 ins_encode %{ 9280 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9281 %} 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9286 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9287 match(Set dst (AndV src1 src2)); 9288 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9289 ins_encode %{ 9290 int vector_len = 0; 9291 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9292 %} 9293 ins_pipe( pipe_slow ); 9294 %} 9295 9296 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9297 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9298 match(Set dst (AndV src (LoadVector mem))); 9299 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9300 ins_encode %{ 9301 int vector_len = 0; 9302 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9303 %} 9304 ins_pipe( pipe_slow ); 9305 %} 9306 9307 instruct vand8B(vecD dst, vecD src) %{ 9308 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9309 match(Set dst (AndV dst src)); 9310 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9311 ins_encode %{ 9312 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9313 %} 9314 ins_pipe( pipe_slow ); 9315 %} 9316 9317 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 9318 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9319 match(Set dst (AndV src1 src2)); 9320 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 9321 ins_encode %{ 9322 int vector_len = 0; 9323 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9324 %} 9325 ins_pipe( pipe_slow ); 9326 %} 9327 9328 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 9329 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9330 match(Set dst (AndV src (LoadVector mem))); 9331 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 9332 ins_encode %{ 9333 int vector_len = 0; 9334 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct vand16B(vecX dst, vecX src) %{ 9340 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9341 match(Set dst (AndV dst src)); 9342 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 9343 ins_encode %{ 9344 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9345 %} 9346 ins_pipe( pipe_slow ); 9347 %} 9348 9349 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 9350 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9351 match(Set dst (AndV src1 src2)); 9352 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 9353 ins_encode %{ 9354 int vector_len = 0; 9355 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 9361 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9362 match(Set dst (AndV src (LoadVector mem))); 9363 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 9364 ins_encode %{ 9365 int vector_len = 0; 9366 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9367 %} 9368 ins_pipe( pipe_slow ); 9369 %} 9370 9371 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 9372 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9373 match(Set dst (AndV src1 src2)); 9374 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 9375 ins_encode %{ 9376 int vector_len = 1; 9377 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9378 %} 9379 ins_pipe( pipe_slow ); 9380 %} 9381 9382 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 9383 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9384 match(Set dst (AndV src (LoadVector mem))); 9385 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 9386 ins_encode %{ 9387 int vector_len = 1; 9388 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9389 %} 9390 ins_pipe( pipe_slow ); 9391 %} 9392 9393 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9394 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9395 match(Set dst (AndV src1 src2)); 9396 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 9397 ins_encode %{ 9398 int vector_len = 2; 9399 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9405 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9406 match(Set dst (AndV src (LoadVector mem))); 9407 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9408 ins_encode %{ 9409 int vector_len = 2; 9410 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 // --------------------------------- OR --------------------------------------- 9416 9417 instruct vor4B(vecS dst, vecS src) %{ 9418 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9419 match(Set dst (OrV dst src)); 9420 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9421 ins_encode %{ 9422 __ por($dst$$XMMRegister, $src$$XMMRegister); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9428 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9429 match(Set dst (OrV src1 src2)); 9430 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9431 ins_encode %{ 9432 int vector_len = 0; 9433 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9434 %} 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9439 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9440 match(Set dst (OrV src (LoadVector mem))); 9441 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9442 ins_encode %{ 9443 int vector_len = 0; 9444 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9445 %} 9446 ins_pipe( pipe_slow ); 9447 %} 9448 9449 instruct vor8B(vecD dst, vecD src) %{ 9450 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9451 match(Set dst (OrV dst src)); 9452 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9453 ins_encode %{ 9454 __ por($dst$$XMMRegister, $src$$XMMRegister); 9455 %} 9456 ins_pipe( pipe_slow ); 9457 %} 9458 9459 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9460 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9461 match(Set dst (OrV src1 src2)); 9462 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9463 ins_encode %{ 9464 int vector_len = 0; 9465 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9471 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9472 match(Set dst (OrV src (LoadVector mem))); 9473 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9474 ins_encode %{ 9475 int vector_len = 0; 9476 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9477 %} 9478 ins_pipe( pipe_slow ); 9479 %} 9480 9481 instruct vor16B(vecX dst, vecX src) %{ 9482 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9483 match(Set dst (OrV dst src)); 9484 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9485 ins_encode %{ 9486 __ por($dst$$XMMRegister, $src$$XMMRegister); 9487 %} 9488 ins_pipe( pipe_slow ); 9489 %} 9490 9491 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9492 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9493 match(Set dst (OrV src1 src2)); 9494 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9495 ins_encode %{ 9496 int vector_len = 0; 9497 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9498 %} 9499 ins_pipe( pipe_slow ); 9500 %} 9501 9502 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9503 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9504 match(Set dst (OrV src (LoadVector mem))); 9505 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9506 ins_encode %{ 9507 int vector_len = 0; 9508 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9514 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9515 match(Set dst (OrV src1 src2)); 9516 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9517 ins_encode %{ 9518 int vector_len = 1; 9519 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9520 %} 9521 ins_pipe( pipe_slow ); 9522 %} 9523 9524 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9525 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9526 match(Set dst (OrV src (LoadVector mem))); 9527 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9528 ins_encode %{ 9529 int vector_len = 1; 9530 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9531 %} 9532 ins_pipe( pipe_slow ); 9533 %} 9534 9535 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9536 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9537 match(Set dst (OrV src1 src2)); 9538 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9539 ins_encode %{ 9540 int vector_len = 2; 9541 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9542 %} 9543 ins_pipe( pipe_slow ); 9544 %} 9545 9546 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9547 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9548 match(Set dst (OrV src (LoadVector mem))); 9549 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9550 ins_encode %{ 9551 int vector_len = 2; 9552 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9553 %} 9554 ins_pipe( pipe_slow ); 9555 %} 9556 9557 // --------------------------------- XOR -------------------------------------- 9558 9559 instruct vxor4B(vecS dst, vecS src) %{ 9560 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9561 match(Set dst (XorV dst src)); 9562 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9563 ins_encode %{ 9564 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9570 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9571 match(Set dst (XorV src1 src2)); 9572 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9573 ins_encode %{ 9574 int vector_len = 0; 9575 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9576 %} 9577 ins_pipe( pipe_slow ); 9578 %} 9579 9580 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9581 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9582 match(Set dst (XorV src (LoadVector mem))); 9583 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9584 ins_encode %{ 9585 int vector_len = 0; 9586 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct vxor8B(vecD dst, vecD src) %{ 9592 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9593 match(Set dst (XorV dst src)); 9594 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9595 ins_encode %{ 9596 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9597 %} 9598 ins_pipe( pipe_slow ); 9599 %} 9600 9601 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9602 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9603 match(Set dst (XorV src1 src2)); 9604 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9605 ins_encode %{ 9606 int vector_len = 0; 9607 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9608 %} 9609 ins_pipe( pipe_slow ); 9610 %} 9611 9612 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9613 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9614 match(Set dst (XorV src (LoadVector mem))); 9615 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9616 ins_encode %{ 9617 int vector_len = 0; 9618 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9619 %} 9620 ins_pipe( pipe_slow ); 9621 %} 9622 9623 instruct vxor16B(vecX dst, vecX src) %{ 9624 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9625 match(Set dst (XorV dst src)); 9626 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9627 ins_encode %{ 9628 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9629 %} 9630 ins_pipe( pipe_slow ); 9631 %} 9632 9633 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9634 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9635 match(Set dst (XorV src1 src2)); 9636 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9637 ins_encode %{ 9638 int vector_len = 0; 9639 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9645 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9646 match(Set dst (XorV src (LoadVector mem))); 9647 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9648 ins_encode %{ 9649 int vector_len = 0; 9650 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9651 %} 9652 ins_pipe( pipe_slow ); 9653 %} 9654 9655 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9656 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9657 match(Set dst (XorV src1 src2)); 9658 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9659 ins_encode %{ 9660 int vector_len = 1; 9661 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9662 %} 9663 ins_pipe( pipe_slow ); 9664 %} 9665 9666 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9667 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9668 match(Set dst (XorV src (LoadVector mem))); 9669 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9670 ins_encode %{ 9671 int vector_len = 1; 9672 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9673 %} 9674 ins_pipe( pipe_slow ); 9675 %} 9676 9677 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9678 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9679 match(Set dst (XorV src1 src2)); 9680 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9681 ins_encode %{ 9682 int vector_len = 2; 9683 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9684 %} 9685 ins_pipe( pipe_slow ); 9686 %} 9687 9688 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9689 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9690 match(Set dst (XorV src (LoadVector mem))); 9691 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9692 ins_encode %{ 9693 int vector_len = 2; 9694 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9695 %} 9696 ins_pipe( pipe_slow ); 9697 %} 9698 9699 // --------------------------------- FMA -------------------------------------- 9700 9701 // a * b + c 9702 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9703 predicate(UseFMA && n->as_Vector()->length() == 2); 9704 match(Set c (FmaVD c (Binary a b))); 9705 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9706 ins_cost(150); 9707 ins_encode %{ 9708 int vector_len = 0; 9709 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9710 %} 9711 ins_pipe( pipe_slow ); 9712 %} 9713 9714 // a * b + c 9715 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9716 predicate(UseFMA && n->as_Vector()->length() == 2); 9717 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9718 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9719 ins_cost(150); 9720 ins_encode %{ 9721 int vector_len = 0; 9722 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9723 %} 9724 ins_pipe( pipe_slow ); 9725 %} 9726 9727 9728 // a * b + c 9729 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9730 predicate(UseFMA && n->as_Vector()->length() == 4); 9731 match(Set c (FmaVD c (Binary a b))); 9732 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9733 ins_cost(150); 9734 ins_encode %{ 9735 int vector_len = 1; 9736 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9737 %} 9738 ins_pipe( pipe_slow ); 9739 %} 9740 9741 // a * b + c 9742 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9743 predicate(UseFMA && n->as_Vector()->length() == 4); 9744 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9745 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9746 ins_cost(150); 9747 ins_encode %{ 9748 int vector_len = 1; 9749 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 // a * b + c 9755 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9756 predicate(UseFMA && n->as_Vector()->length() == 8); 9757 match(Set c (FmaVD c (Binary a b))); 9758 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9759 ins_cost(150); 9760 ins_encode %{ 9761 int vector_len = 2; 9762 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 // a * b + c 9768 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9769 predicate(UseFMA && n->as_Vector()->length() == 8); 9770 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9771 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9772 ins_cost(150); 9773 ins_encode %{ 9774 int vector_len = 2; 9775 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9776 %} 9777 ins_pipe( pipe_slow ); 9778 %} 9779 9780 // a * b + c 9781 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9782 predicate(UseFMA && n->as_Vector()->length() == 4); 9783 match(Set c (FmaVF c (Binary a b))); 9784 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9785 ins_cost(150); 9786 ins_encode %{ 9787 int vector_len = 0; 9788 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9789 %} 9790 ins_pipe( pipe_slow ); 9791 %} 9792 9793 // a * b + c 9794 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9795 predicate(UseFMA && n->as_Vector()->length() == 4); 9796 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9797 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9798 ins_cost(150); 9799 ins_encode %{ 9800 int vector_len = 0; 9801 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9802 %} 9803 ins_pipe( pipe_slow ); 9804 %} 9805 9806 // a * b + c 9807 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9808 predicate(UseFMA && n->as_Vector()->length() == 8); 9809 match(Set c (FmaVF c (Binary a b))); 9810 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9811 ins_cost(150); 9812 ins_encode %{ 9813 int vector_len = 1; 9814 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9815 %} 9816 ins_pipe( pipe_slow ); 9817 %} 9818 9819 // a * b + c 9820 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9821 predicate(UseFMA && n->as_Vector()->length() == 8); 9822 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9823 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9824 ins_cost(150); 9825 ins_encode %{ 9826 int vector_len = 1; 9827 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9828 %} 9829 ins_pipe( pipe_slow ); 9830 %} 9831 9832 // a * b + c 9833 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9834 predicate(UseFMA && n->as_Vector()->length() == 16); 9835 match(Set c (FmaVF c (Binary a b))); 9836 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9837 ins_cost(150); 9838 ins_encode %{ 9839 int vector_len = 2; 9840 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9841 %} 9842 ins_pipe( pipe_slow ); 9843 %} 9844 9845 // a * b + c 9846 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9847 predicate(UseFMA && n->as_Vector()->length() == 16); 9848 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9849 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9850 ins_cost(150); 9851 ins_encode %{ 9852 int vector_len = 2; 9853 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9854 %} 9855 ins_pipe( pipe_slow ); 9856 %} 9857 9858 // --------------------------------- PopCount -------------------------------------- 9859 9860 instruct vpopcount2I(vecD dst, vecD src) %{ 9861 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9862 match(Set dst (PopCountVI src)); 9863 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9864 ins_encode %{ 9865 int vector_len = 0; 9866 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9867 %} 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 instruct vpopcount4I(vecX dst, vecX src) %{ 9872 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9873 match(Set dst (PopCountVI src)); 9874 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9875 ins_encode %{ 9876 int vector_len = 0; 9877 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9878 %} 9879 ins_pipe( pipe_slow ); 9880 %} 9881 9882 instruct vpopcount8I(vecY dst, vecY src) %{ 9883 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 9884 match(Set dst (PopCountVI src)); 9885 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 9886 ins_encode %{ 9887 int vector_len = 1; 9888 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9889 %} 9890 ins_pipe( pipe_slow ); 9891 %} 9892 9893 instruct vpopcount16I(vecZ dst, vecZ src) %{ 9894 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 9895 match(Set dst (PopCountVI src)); 9896 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 9897 ins_encode %{ 9898 int vector_len = 2; 9899 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9900 %} 9901 ins_pipe( pipe_slow ); 9902 %}