1 // 2 // Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1425 case Op_CompareAndSwapL: 1426 #ifdef _LP64 1427 case Op_CompareAndSwapP: 1428 #endif 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 #ifdef _LP64 1454 case Op_MaxD: 1455 case Op_MaxF: 1456 case Op_MinD: 1457 case Op_MinF: 1458 if (UseAVX < 1) // enabled for AVX only 1459 ret_value = false; 1460 break; 1461 #endif 1462 } 1463 1464 return ret_value; // Per default match rules are supported. 1465 } 1466 1467 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1468 // identify extra cases that we might want to provide match rules for 1469 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1470 bool ret_value = match_rule_supported(opcode); 1471 if (ret_value) { 1472 switch (opcode) { 1473 case Op_AddVB: 1474 case Op_SubVB: 1475 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1476 ret_value = false; 1477 break; 1478 case Op_URShiftVS: 1479 case Op_RShiftVS: 1480 case Op_LShiftVS: 1481 case Op_MulVS: 1482 case Op_AddVS: 1483 case Op_SubVS: 1484 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1485 ret_value = false; 1486 break; 1487 case Op_CMoveVF: 1488 if (vlen != 8) 1489 ret_value = false; 1490 break; 1491 case Op_CMoveVD: 1492 if (vlen != 4) 1493 ret_value = false; 1494 break; 1495 } 1496 } 1497 1498 return ret_value; // Per default match rules are supported. 1499 } 1500 1501 const bool Matcher::has_predicated_vectors(void) { 1502 bool ret_value = false; 1503 if (UseAVX > 2) { 1504 ret_value = VM_Version::supports_avx512vl(); 1505 } 1506 1507 return ret_value; 1508 } 1509 1510 const int Matcher::float_pressure(int default_pressure_threshold) { 1511 int float_pressure_threshold = default_pressure_threshold; 1512 #ifdef _LP64 1513 if (UseAVX > 2) { 1514 // Increase pressure threshold on machines with AVX3 which have 1515 // 2x more XMM registers. 1516 float_pressure_threshold = default_pressure_threshold * 2; 1517 } 1518 #endif 1519 return float_pressure_threshold; 1520 } 1521 1522 // Max vector size in bytes. 0 if not supported. 1523 const int Matcher::vector_width_in_bytes(BasicType bt) { 1524 assert(is_java_primitive(bt), "only primitive type vectors"); 1525 if (UseSSE < 2) return 0; 1526 // SSE2 supports 128bit vectors for all types. 1527 // AVX2 supports 256bit vectors for all types. 1528 // AVX2/EVEX supports 512bit vectors for all types. 1529 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1530 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1531 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1532 size = (UseAVX > 2) ? 64 : 32; 1533 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1534 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1535 // Use flag to limit vector size. 1536 size = MIN2(size,(int)MaxVectorSize); 1537 // Minimum 2 values in vector (or 4 for bytes). 1538 switch (bt) { 1539 case T_DOUBLE: 1540 case T_LONG: 1541 if (size < 16) return 0; 1542 break; 1543 case T_FLOAT: 1544 case T_INT: 1545 if (size < 8) return 0; 1546 break; 1547 case T_BOOLEAN: 1548 if (size < 4) return 0; 1549 break; 1550 case T_CHAR: 1551 if (size < 4) return 0; 1552 break; 1553 case T_BYTE: 1554 if (size < 4) return 0; 1555 break; 1556 case T_SHORT: 1557 if (size < 4) return 0; 1558 break; 1559 default: 1560 ShouldNotReachHere(); 1561 } 1562 return size; 1563 } 1564 1565 // Limits on vector size (number of elements) loaded into vector. 1566 const int Matcher::max_vector_size(const BasicType bt) { 1567 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1568 } 1569 const int Matcher::min_vector_size(const BasicType bt) { 1570 int max_size = max_vector_size(bt); 1571 // Min size which can be loaded into vector is 4 bytes. 1572 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1573 return MIN2(size,max_size); 1574 } 1575 1576 // Vector ideal reg corresponding to specified size in bytes 1577 const uint Matcher::vector_ideal_reg(int size) { 1578 assert(MaxVectorSize >= size, ""); 1579 switch(size) { 1580 case 4: return Op_VecS; 1581 case 8: return Op_VecD; 1582 case 16: return Op_VecX; 1583 case 32: return Op_VecY; 1584 case 64: return Op_VecZ; 1585 } 1586 ShouldNotReachHere(); 1587 return 0; 1588 } 1589 1590 // Only lowest bits of xmm reg are used for vector shift count. 1591 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1592 return Op_VecS; 1593 } 1594 1595 // x86 supports misaligned vectors store/load. 1596 const bool Matcher::misaligned_vectors_ok() { 1597 return true; 1598 } 1599 1600 // x86 AES instructions are compatible with SunJCE expanded 1601 // keys, hence we do not need to pass the original key to stubs 1602 const bool Matcher::pass_original_key_for_aes() { 1603 return false; 1604 } 1605 1606 1607 const bool Matcher::convi2l_type_required = true; 1608 1609 // Check for shift by small constant as well 1610 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1611 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1612 shift->in(2)->get_int() <= 3 && 1613 // Are there other uses besides address expressions? 1614 !matcher->is_visited(shift)) { 1615 address_visited.set(shift->_idx); // Flag as address_visited 1616 mstack.push(shift->in(2), Matcher::Visit); 1617 Node *conv = shift->in(1); 1618 #ifdef _LP64 1619 // Allow Matcher to match the rule which bypass 1620 // ConvI2L operation for an array index on LP64 1621 // if the index value is positive. 1622 if (conv->Opcode() == Op_ConvI2L && 1623 conv->as_Type()->type()->is_long()->_lo >= 0 && 1624 // Are there other uses besides address expressions? 1625 !matcher->is_visited(conv)) { 1626 address_visited.set(conv->_idx); // Flag as address_visited 1627 mstack.push(conv->in(1), Matcher::Pre_Visit); 1628 } else 1629 #endif 1630 mstack.push(conv, Matcher::Pre_Visit); 1631 return true; 1632 } 1633 return false; 1634 } 1635 1636 // Should the Matcher clone shifts on addressing modes, expecting them 1637 // to be subsumed into complex addressing expressions or compute them 1638 // into registers? 1639 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1640 Node *off = m->in(AddPNode::Offset); 1641 if (off->is_Con()) { 1642 address_visited.test_set(m->_idx); // Flag as address_visited 1643 Node *adr = m->in(AddPNode::Address); 1644 1645 // Intel can handle 2 adds in addressing mode 1646 // AtomicAdd is not an addressing expression. 1647 // Cheap to find it by looking for screwy base. 1648 if (adr->is_AddP() && 1649 !adr->in(AddPNode::Base)->is_top() && 1650 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1651 // Are there other uses besides address expressions? 1652 !is_visited(adr)) { 1653 address_visited.set(adr->_idx); // Flag as address_visited 1654 Node *shift = adr->in(AddPNode::Offset); 1655 if (!clone_shift(shift, this, mstack, address_visited)) { 1656 mstack.push(shift, Pre_Visit); 1657 } 1658 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1659 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1660 } else { 1661 mstack.push(adr, Pre_Visit); 1662 } 1663 1664 // Clone X+offset as it also folds into most addressing expressions 1665 mstack.push(off, Visit); 1666 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1667 return true; 1668 } else if (clone_shift(off, this, mstack, address_visited)) { 1669 address_visited.test_set(m->_idx); // Flag as address_visited 1670 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1671 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1672 return true; 1673 } 1674 return false; 1675 } 1676 1677 void Compile::reshape_address(AddPNode* addp) { 1678 } 1679 1680 // Helper methods for MachSpillCopyNode::implementation(). 1681 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1682 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1683 // In 64-bit VM size calculation is very complex. Emitting instructions 1684 // into scratch buffer is used to get size in 64-bit VM. 1685 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1686 assert(ireg == Op_VecS || // 32bit vector 1687 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1688 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1689 "no non-adjacent vector moves" ); 1690 if (cbuf) { 1691 MacroAssembler _masm(cbuf); 1692 int offset = __ offset(); 1693 switch (ireg) { 1694 case Op_VecS: // copy whole register 1695 case Op_VecD: 1696 case Op_VecX: 1697 #ifndef _LP64 1698 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1699 #else 1700 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1701 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1702 } else { 1703 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1704 } 1705 #endif 1706 break; 1707 case Op_VecY: 1708 #ifndef _LP64 1709 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1710 #else 1711 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1712 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1713 } else { 1714 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1715 } 1716 #endif 1717 break; 1718 case Op_VecZ: 1719 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1720 break; 1721 default: 1722 ShouldNotReachHere(); 1723 } 1724 int size = __ offset() - offset; 1725 #ifdef ASSERT 1726 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1727 assert(!do_size || size == 4, "incorrect size calculattion"); 1728 #endif 1729 return size; 1730 #ifndef PRODUCT 1731 } else if (!do_size) { 1732 switch (ireg) { 1733 case Op_VecS: 1734 case Op_VecD: 1735 case Op_VecX: 1736 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1737 break; 1738 case Op_VecY: 1739 case Op_VecZ: 1740 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1741 break; 1742 default: 1743 ShouldNotReachHere(); 1744 } 1745 #endif 1746 } 1747 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1748 return (UseAVX > 2) ? 6 : 4; 1749 } 1750 1751 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1752 int stack_offset, int reg, uint ireg, outputStream* st) { 1753 // In 64-bit VM size calculation is very complex. Emitting instructions 1754 // into scratch buffer is used to get size in 64-bit VM. 1755 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1756 if (cbuf) { 1757 MacroAssembler _masm(cbuf); 1758 int offset = __ offset(); 1759 if (is_load) { 1760 switch (ireg) { 1761 case Op_VecS: 1762 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1763 break; 1764 case Op_VecD: 1765 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1766 break; 1767 case Op_VecX: 1768 #ifndef _LP64 1769 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1770 #else 1771 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1772 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1773 } else { 1774 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1775 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1776 } 1777 #endif 1778 break; 1779 case Op_VecY: 1780 #ifndef _LP64 1781 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1782 #else 1783 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1784 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1785 } else { 1786 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1787 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1788 } 1789 #endif 1790 break; 1791 case Op_VecZ: 1792 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1793 break; 1794 default: 1795 ShouldNotReachHere(); 1796 } 1797 } else { // store 1798 switch (ireg) { 1799 case Op_VecS: 1800 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1801 break; 1802 case Op_VecD: 1803 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1804 break; 1805 case Op_VecX: 1806 #ifndef _LP64 1807 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1808 #else 1809 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1810 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1811 } 1812 else { 1813 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1814 } 1815 #endif 1816 break; 1817 case Op_VecY: 1818 #ifndef _LP64 1819 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1820 #else 1821 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1822 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1823 } 1824 else { 1825 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1826 } 1827 #endif 1828 break; 1829 case Op_VecZ: 1830 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1831 break; 1832 default: 1833 ShouldNotReachHere(); 1834 } 1835 } 1836 int size = __ offset() - offset; 1837 #ifdef ASSERT 1838 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1839 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1840 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1841 #endif 1842 return size; 1843 #ifndef PRODUCT 1844 } else if (!do_size) { 1845 if (is_load) { 1846 switch (ireg) { 1847 case Op_VecS: 1848 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1849 break; 1850 case Op_VecD: 1851 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1852 break; 1853 case Op_VecX: 1854 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1855 break; 1856 case Op_VecY: 1857 case Op_VecZ: 1858 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1859 break; 1860 default: 1861 ShouldNotReachHere(); 1862 } 1863 } else { // store 1864 switch (ireg) { 1865 case Op_VecS: 1866 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1867 break; 1868 case Op_VecD: 1869 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1870 break; 1871 case Op_VecX: 1872 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1873 break; 1874 case Op_VecY: 1875 case Op_VecZ: 1876 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1877 break; 1878 default: 1879 ShouldNotReachHere(); 1880 } 1881 } 1882 #endif 1883 } 1884 bool is_single_byte = false; 1885 int vec_len = 0; 1886 if ((UseAVX > 2) && (stack_offset != 0)) { 1887 int tuple_type = Assembler::EVEX_FVM; 1888 int input_size = Assembler::EVEX_32bit; 1889 switch (ireg) { 1890 case Op_VecS: 1891 tuple_type = Assembler::EVEX_T1S; 1892 break; 1893 case Op_VecD: 1894 tuple_type = Assembler::EVEX_T1S; 1895 input_size = Assembler::EVEX_64bit; 1896 break; 1897 case Op_VecX: 1898 break; 1899 case Op_VecY: 1900 vec_len = 1; 1901 break; 1902 case Op_VecZ: 1903 vec_len = 2; 1904 break; 1905 } 1906 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1907 } 1908 int offset_size = 0; 1909 int size = 5; 1910 if (UseAVX > 2 ) { 1911 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1912 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1913 size += 2; // Need an additional two bytes for EVEX encoding 1914 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1915 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1916 } else { 1917 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1918 size += 2; // Need an additional two bytes for EVEX encodding 1919 } 1920 } else { 1921 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1922 } 1923 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1924 return size+offset_size; 1925 } 1926 1927 static inline jint replicate4_imm(int con, int width) { 1928 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1929 assert(width == 1 || width == 2, "only byte or short types here"); 1930 int bit_width = width * 8; 1931 jint val = con; 1932 val &= (1 << bit_width) - 1; // mask off sign bits 1933 while(bit_width < 32) { 1934 val |= (val << bit_width); 1935 bit_width <<= 1; 1936 } 1937 return val; 1938 } 1939 1940 static inline jlong replicate8_imm(int con, int width) { 1941 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1942 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1943 int bit_width = width * 8; 1944 jlong val = con; 1945 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1946 while(bit_width < 64) { 1947 val |= (val << bit_width); 1948 bit_width <<= 1; 1949 } 1950 return val; 1951 } 1952 1953 #ifndef PRODUCT 1954 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1955 st->print("nop \t# %d bytes pad for loops and calls", _count); 1956 } 1957 #endif 1958 1959 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1960 MacroAssembler _masm(&cbuf); 1961 __ nop(_count); 1962 } 1963 1964 uint MachNopNode::size(PhaseRegAlloc*) const { 1965 return _count; 1966 } 1967 1968 #ifndef PRODUCT 1969 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1970 st->print("# breakpoint"); 1971 } 1972 #endif 1973 1974 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1975 MacroAssembler _masm(&cbuf); 1976 __ int3(); 1977 } 1978 1979 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1980 return MachNode::size(ra_); 1981 } 1982 1983 %} 1984 1985 encode %{ 1986 1987 enc_class call_epilog %{ 1988 if (VerifyStackAtCalls) { 1989 // Check that stack depth is unchanged: find majik cookie on stack 1990 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1991 MacroAssembler _masm(&cbuf); 1992 Label L; 1993 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1994 __ jccb(Assembler::equal, L); 1995 // Die if stack mismatch 1996 __ int3(); 1997 __ bind(L); 1998 } 1999 %} 2000 2001 %} 2002 2003 2004 //----------OPERANDS----------------------------------------------------------- 2005 // Operand definitions must precede instruction definitions for correct parsing 2006 // in the ADLC because operands constitute user defined types which are used in 2007 // instruction definitions. 2008 2009 operand vecZ() %{ 2010 constraint(ALLOC_IN_RC(vectorz_reg)); 2011 match(VecZ); 2012 2013 format %{ %} 2014 interface(REG_INTER); 2015 %} 2016 2017 operand legVecZ() %{ 2018 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2019 match(VecZ); 2020 2021 format %{ %} 2022 interface(REG_INTER); 2023 %} 2024 2025 // Comparison Code for FP conditional move 2026 operand cmpOp_vcmppd() %{ 2027 match(Bool); 2028 2029 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2030 n->as_Bool()->_test._test != BoolTest::no_overflow); 2031 format %{ "" %} 2032 interface(COND_INTER) %{ 2033 equal (0x0, "eq"); 2034 less (0x1, "lt"); 2035 less_equal (0x2, "le"); 2036 not_equal (0xC, "ne"); 2037 greater_equal(0xD, "ge"); 2038 greater (0xE, "gt"); 2039 //TODO cannot compile (adlc breaks) without two next lines with error: 2040 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2041 // equal' for overflow. 2042 overflow (0x20, "o"); // not really supported by the instruction 2043 no_overflow (0x21, "no"); // not really supported by the instruction 2044 %} 2045 %} 2046 2047 2048 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2049 2050 // ============================================================================ 2051 2052 instruct ShouldNotReachHere() %{ 2053 match(Halt); 2054 format %{ "ud2\t# ShouldNotReachHere" %} 2055 ins_encode %{ 2056 __ ud2(); 2057 %} 2058 ins_pipe(pipe_slow); 2059 %} 2060 2061 // =================================EVEX special=============================== 2062 2063 instruct setMask(rRegI dst, rRegI src) %{ 2064 predicate(Matcher::has_predicated_vectors()); 2065 match(Set dst (SetVectMaskI src)); 2066 effect(TEMP dst); 2067 format %{ "setvectmask $dst, $src" %} 2068 ins_encode %{ 2069 __ setvectmask($dst$$Register, $src$$Register); 2070 %} 2071 ins_pipe(pipe_slow); 2072 %} 2073 2074 // ============================================================================ 2075 2076 instruct addF_reg(regF dst, regF src) %{ 2077 predicate((UseSSE>=1) && (UseAVX == 0)); 2078 match(Set dst (AddF dst src)); 2079 2080 format %{ "addss $dst, $src" %} 2081 ins_cost(150); 2082 ins_encode %{ 2083 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2084 %} 2085 ins_pipe(pipe_slow); 2086 %} 2087 2088 instruct addF_mem(regF dst, memory src) %{ 2089 predicate((UseSSE>=1) && (UseAVX == 0)); 2090 match(Set dst (AddF dst (LoadF src))); 2091 2092 format %{ "addss $dst, $src" %} 2093 ins_cost(150); 2094 ins_encode %{ 2095 __ addss($dst$$XMMRegister, $src$$Address); 2096 %} 2097 ins_pipe(pipe_slow); 2098 %} 2099 2100 instruct addF_imm(regF dst, immF con) %{ 2101 predicate((UseSSE>=1) && (UseAVX == 0)); 2102 match(Set dst (AddF dst con)); 2103 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2104 ins_cost(150); 2105 ins_encode %{ 2106 __ addss($dst$$XMMRegister, $constantaddress($con)); 2107 %} 2108 ins_pipe(pipe_slow); 2109 %} 2110 2111 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2112 predicate(UseAVX > 0); 2113 match(Set dst (AddF src1 src2)); 2114 2115 format %{ "vaddss $dst, $src1, $src2" %} 2116 ins_cost(150); 2117 ins_encode %{ 2118 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2119 %} 2120 ins_pipe(pipe_slow); 2121 %} 2122 2123 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2124 predicate(UseAVX > 0); 2125 match(Set dst (AddF src1 (LoadF src2))); 2126 2127 format %{ "vaddss $dst, $src1, $src2" %} 2128 ins_cost(150); 2129 ins_encode %{ 2130 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2131 %} 2132 ins_pipe(pipe_slow); 2133 %} 2134 2135 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2136 predicate(UseAVX > 0); 2137 match(Set dst (AddF src con)); 2138 2139 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2140 ins_cost(150); 2141 ins_encode %{ 2142 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2143 %} 2144 ins_pipe(pipe_slow); 2145 %} 2146 2147 instruct addD_reg(regD dst, regD src) %{ 2148 predicate((UseSSE>=2) && (UseAVX == 0)); 2149 match(Set dst (AddD dst src)); 2150 2151 format %{ "addsd $dst, $src" %} 2152 ins_cost(150); 2153 ins_encode %{ 2154 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2155 %} 2156 ins_pipe(pipe_slow); 2157 %} 2158 2159 instruct addD_mem(regD dst, memory src) %{ 2160 predicate((UseSSE>=2) && (UseAVX == 0)); 2161 match(Set dst (AddD dst (LoadD src))); 2162 2163 format %{ "addsd $dst, $src" %} 2164 ins_cost(150); 2165 ins_encode %{ 2166 __ addsd($dst$$XMMRegister, $src$$Address); 2167 %} 2168 ins_pipe(pipe_slow); 2169 %} 2170 2171 instruct addD_imm(regD dst, immD con) %{ 2172 predicate((UseSSE>=2) && (UseAVX == 0)); 2173 match(Set dst (AddD dst con)); 2174 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2175 ins_cost(150); 2176 ins_encode %{ 2177 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2178 %} 2179 ins_pipe(pipe_slow); 2180 %} 2181 2182 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2183 predicate(UseAVX > 0); 2184 match(Set dst (AddD src1 src2)); 2185 2186 format %{ "vaddsd $dst, $src1, $src2" %} 2187 ins_cost(150); 2188 ins_encode %{ 2189 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2190 %} 2191 ins_pipe(pipe_slow); 2192 %} 2193 2194 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2195 predicate(UseAVX > 0); 2196 match(Set dst (AddD src1 (LoadD src2))); 2197 2198 format %{ "vaddsd $dst, $src1, $src2" %} 2199 ins_cost(150); 2200 ins_encode %{ 2201 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2202 %} 2203 ins_pipe(pipe_slow); 2204 %} 2205 2206 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2207 predicate(UseAVX > 0); 2208 match(Set dst (AddD src con)); 2209 2210 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2211 ins_cost(150); 2212 ins_encode %{ 2213 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2214 %} 2215 ins_pipe(pipe_slow); 2216 %} 2217 2218 instruct subF_reg(regF dst, regF src) %{ 2219 predicate((UseSSE>=1) && (UseAVX == 0)); 2220 match(Set dst (SubF dst src)); 2221 2222 format %{ "subss $dst, $src" %} 2223 ins_cost(150); 2224 ins_encode %{ 2225 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2226 %} 2227 ins_pipe(pipe_slow); 2228 %} 2229 2230 instruct subF_mem(regF dst, memory src) %{ 2231 predicate((UseSSE>=1) && (UseAVX == 0)); 2232 match(Set dst (SubF dst (LoadF src))); 2233 2234 format %{ "subss $dst, $src" %} 2235 ins_cost(150); 2236 ins_encode %{ 2237 __ subss($dst$$XMMRegister, $src$$Address); 2238 %} 2239 ins_pipe(pipe_slow); 2240 %} 2241 2242 instruct subF_imm(regF dst, immF con) %{ 2243 predicate((UseSSE>=1) && (UseAVX == 0)); 2244 match(Set dst (SubF dst con)); 2245 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2246 ins_cost(150); 2247 ins_encode %{ 2248 __ subss($dst$$XMMRegister, $constantaddress($con)); 2249 %} 2250 ins_pipe(pipe_slow); 2251 %} 2252 2253 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2254 predicate(UseAVX > 0); 2255 match(Set dst (SubF src1 src2)); 2256 2257 format %{ "vsubss $dst, $src1, $src2" %} 2258 ins_cost(150); 2259 ins_encode %{ 2260 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2261 %} 2262 ins_pipe(pipe_slow); 2263 %} 2264 2265 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2266 predicate(UseAVX > 0); 2267 match(Set dst (SubF src1 (LoadF src2))); 2268 2269 format %{ "vsubss $dst, $src1, $src2" %} 2270 ins_cost(150); 2271 ins_encode %{ 2272 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2273 %} 2274 ins_pipe(pipe_slow); 2275 %} 2276 2277 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2278 predicate(UseAVX > 0); 2279 match(Set dst (SubF src con)); 2280 2281 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2282 ins_cost(150); 2283 ins_encode %{ 2284 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2285 %} 2286 ins_pipe(pipe_slow); 2287 %} 2288 2289 instruct subD_reg(regD dst, regD src) %{ 2290 predicate((UseSSE>=2) && (UseAVX == 0)); 2291 match(Set dst (SubD dst src)); 2292 2293 format %{ "subsd $dst, $src" %} 2294 ins_cost(150); 2295 ins_encode %{ 2296 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2297 %} 2298 ins_pipe(pipe_slow); 2299 %} 2300 2301 instruct subD_mem(regD dst, memory src) %{ 2302 predicate((UseSSE>=2) && (UseAVX == 0)); 2303 match(Set dst (SubD dst (LoadD src))); 2304 2305 format %{ "subsd $dst, $src" %} 2306 ins_cost(150); 2307 ins_encode %{ 2308 __ subsd($dst$$XMMRegister, $src$$Address); 2309 %} 2310 ins_pipe(pipe_slow); 2311 %} 2312 2313 instruct subD_imm(regD dst, immD con) %{ 2314 predicate((UseSSE>=2) && (UseAVX == 0)); 2315 match(Set dst (SubD dst con)); 2316 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2317 ins_cost(150); 2318 ins_encode %{ 2319 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2320 %} 2321 ins_pipe(pipe_slow); 2322 %} 2323 2324 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2325 predicate(UseAVX > 0); 2326 match(Set dst (SubD src1 src2)); 2327 2328 format %{ "vsubsd $dst, $src1, $src2" %} 2329 ins_cost(150); 2330 ins_encode %{ 2331 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2332 %} 2333 ins_pipe(pipe_slow); 2334 %} 2335 2336 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2337 predicate(UseAVX > 0); 2338 match(Set dst (SubD src1 (LoadD src2))); 2339 2340 format %{ "vsubsd $dst, $src1, $src2" %} 2341 ins_cost(150); 2342 ins_encode %{ 2343 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2344 %} 2345 ins_pipe(pipe_slow); 2346 %} 2347 2348 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2349 predicate(UseAVX > 0); 2350 match(Set dst (SubD src con)); 2351 2352 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2353 ins_cost(150); 2354 ins_encode %{ 2355 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2356 %} 2357 ins_pipe(pipe_slow); 2358 %} 2359 2360 instruct mulF_reg(regF dst, regF src) %{ 2361 predicate((UseSSE>=1) && (UseAVX == 0)); 2362 match(Set dst (MulF dst src)); 2363 2364 format %{ "mulss $dst, $src" %} 2365 ins_cost(150); 2366 ins_encode %{ 2367 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2368 %} 2369 ins_pipe(pipe_slow); 2370 %} 2371 2372 instruct mulF_mem(regF dst, memory src) %{ 2373 predicate((UseSSE>=1) && (UseAVX == 0)); 2374 match(Set dst (MulF dst (LoadF src))); 2375 2376 format %{ "mulss $dst, $src" %} 2377 ins_cost(150); 2378 ins_encode %{ 2379 __ mulss($dst$$XMMRegister, $src$$Address); 2380 %} 2381 ins_pipe(pipe_slow); 2382 %} 2383 2384 instruct mulF_imm(regF dst, immF con) %{ 2385 predicate((UseSSE>=1) && (UseAVX == 0)); 2386 match(Set dst (MulF dst con)); 2387 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2388 ins_cost(150); 2389 ins_encode %{ 2390 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2391 %} 2392 ins_pipe(pipe_slow); 2393 %} 2394 2395 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2396 predicate(UseAVX > 0); 2397 match(Set dst (MulF src1 src2)); 2398 2399 format %{ "vmulss $dst, $src1, $src2" %} 2400 ins_cost(150); 2401 ins_encode %{ 2402 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2403 %} 2404 ins_pipe(pipe_slow); 2405 %} 2406 2407 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2408 predicate(UseAVX > 0); 2409 match(Set dst (MulF src1 (LoadF src2))); 2410 2411 format %{ "vmulss $dst, $src1, $src2" %} 2412 ins_cost(150); 2413 ins_encode %{ 2414 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2415 %} 2416 ins_pipe(pipe_slow); 2417 %} 2418 2419 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2420 predicate(UseAVX > 0); 2421 match(Set dst (MulF src con)); 2422 2423 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2424 ins_cost(150); 2425 ins_encode %{ 2426 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2427 %} 2428 ins_pipe(pipe_slow); 2429 %} 2430 2431 instruct mulD_reg(regD dst, regD src) %{ 2432 predicate((UseSSE>=2) && (UseAVX == 0)); 2433 match(Set dst (MulD dst src)); 2434 2435 format %{ "mulsd $dst, $src" %} 2436 ins_cost(150); 2437 ins_encode %{ 2438 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2439 %} 2440 ins_pipe(pipe_slow); 2441 %} 2442 2443 instruct mulD_mem(regD dst, memory src) %{ 2444 predicate((UseSSE>=2) && (UseAVX == 0)); 2445 match(Set dst (MulD dst (LoadD src))); 2446 2447 format %{ "mulsd $dst, $src" %} 2448 ins_cost(150); 2449 ins_encode %{ 2450 __ mulsd($dst$$XMMRegister, $src$$Address); 2451 %} 2452 ins_pipe(pipe_slow); 2453 %} 2454 2455 instruct mulD_imm(regD dst, immD con) %{ 2456 predicate((UseSSE>=2) && (UseAVX == 0)); 2457 match(Set dst (MulD dst con)); 2458 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2459 ins_cost(150); 2460 ins_encode %{ 2461 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2462 %} 2463 ins_pipe(pipe_slow); 2464 %} 2465 2466 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2467 predicate(UseAVX > 0); 2468 match(Set dst (MulD src1 src2)); 2469 2470 format %{ "vmulsd $dst, $src1, $src2" %} 2471 ins_cost(150); 2472 ins_encode %{ 2473 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2474 %} 2475 ins_pipe(pipe_slow); 2476 %} 2477 2478 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2479 predicate(UseAVX > 0); 2480 match(Set dst (MulD src1 (LoadD src2))); 2481 2482 format %{ "vmulsd $dst, $src1, $src2" %} 2483 ins_cost(150); 2484 ins_encode %{ 2485 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2486 %} 2487 ins_pipe(pipe_slow); 2488 %} 2489 2490 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2491 predicate(UseAVX > 0); 2492 match(Set dst (MulD src con)); 2493 2494 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2495 ins_cost(150); 2496 ins_encode %{ 2497 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2498 %} 2499 ins_pipe(pipe_slow); 2500 %} 2501 2502 instruct divF_reg(regF dst, regF src) %{ 2503 predicate((UseSSE>=1) && (UseAVX == 0)); 2504 match(Set dst (DivF dst src)); 2505 2506 format %{ "divss $dst, $src" %} 2507 ins_cost(150); 2508 ins_encode %{ 2509 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2510 %} 2511 ins_pipe(pipe_slow); 2512 %} 2513 2514 instruct divF_mem(regF dst, memory src) %{ 2515 predicate((UseSSE>=1) && (UseAVX == 0)); 2516 match(Set dst (DivF dst (LoadF src))); 2517 2518 format %{ "divss $dst, $src" %} 2519 ins_cost(150); 2520 ins_encode %{ 2521 __ divss($dst$$XMMRegister, $src$$Address); 2522 %} 2523 ins_pipe(pipe_slow); 2524 %} 2525 2526 instruct divF_imm(regF dst, immF con) %{ 2527 predicate((UseSSE>=1) && (UseAVX == 0)); 2528 match(Set dst (DivF dst con)); 2529 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2530 ins_cost(150); 2531 ins_encode %{ 2532 __ divss($dst$$XMMRegister, $constantaddress($con)); 2533 %} 2534 ins_pipe(pipe_slow); 2535 %} 2536 2537 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2538 predicate(UseAVX > 0); 2539 match(Set dst (DivF src1 src2)); 2540 2541 format %{ "vdivss $dst, $src1, $src2" %} 2542 ins_cost(150); 2543 ins_encode %{ 2544 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2545 %} 2546 ins_pipe(pipe_slow); 2547 %} 2548 2549 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2550 predicate(UseAVX > 0); 2551 match(Set dst (DivF src1 (LoadF src2))); 2552 2553 format %{ "vdivss $dst, $src1, $src2" %} 2554 ins_cost(150); 2555 ins_encode %{ 2556 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2557 %} 2558 ins_pipe(pipe_slow); 2559 %} 2560 2561 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2562 predicate(UseAVX > 0); 2563 match(Set dst (DivF src con)); 2564 2565 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2566 ins_cost(150); 2567 ins_encode %{ 2568 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2569 %} 2570 ins_pipe(pipe_slow); 2571 %} 2572 2573 instruct divD_reg(regD dst, regD src) %{ 2574 predicate((UseSSE>=2) && (UseAVX == 0)); 2575 match(Set dst (DivD dst src)); 2576 2577 format %{ "divsd $dst, $src" %} 2578 ins_cost(150); 2579 ins_encode %{ 2580 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2581 %} 2582 ins_pipe(pipe_slow); 2583 %} 2584 2585 instruct divD_mem(regD dst, memory src) %{ 2586 predicate((UseSSE>=2) && (UseAVX == 0)); 2587 match(Set dst (DivD dst (LoadD src))); 2588 2589 format %{ "divsd $dst, $src" %} 2590 ins_cost(150); 2591 ins_encode %{ 2592 __ divsd($dst$$XMMRegister, $src$$Address); 2593 %} 2594 ins_pipe(pipe_slow); 2595 %} 2596 2597 instruct divD_imm(regD dst, immD con) %{ 2598 predicate((UseSSE>=2) && (UseAVX == 0)); 2599 match(Set dst (DivD dst con)); 2600 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2601 ins_cost(150); 2602 ins_encode %{ 2603 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2604 %} 2605 ins_pipe(pipe_slow); 2606 %} 2607 2608 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2609 predicate(UseAVX > 0); 2610 match(Set dst (DivD src1 src2)); 2611 2612 format %{ "vdivsd $dst, $src1, $src2" %} 2613 ins_cost(150); 2614 ins_encode %{ 2615 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2616 %} 2617 ins_pipe(pipe_slow); 2618 %} 2619 2620 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2621 predicate(UseAVX > 0); 2622 match(Set dst (DivD src1 (LoadD src2))); 2623 2624 format %{ "vdivsd $dst, $src1, $src2" %} 2625 ins_cost(150); 2626 ins_encode %{ 2627 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2628 %} 2629 ins_pipe(pipe_slow); 2630 %} 2631 2632 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2633 predicate(UseAVX > 0); 2634 match(Set dst (DivD src con)); 2635 2636 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2637 ins_cost(150); 2638 ins_encode %{ 2639 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2640 %} 2641 ins_pipe(pipe_slow); 2642 %} 2643 2644 instruct absF_reg(regF dst) %{ 2645 predicate((UseSSE>=1) && (UseAVX == 0)); 2646 match(Set dst (AbsF dst)); 2647 ins_cost(150); 2648 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2649 ins_encode %{ 2650 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2651 %} 2652 ins_pipe(pipe_slow); 2653 %} 2654 2655 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2656 predicate(UseAVX > 0); 2657 match(Set dst (AbsF src)); 2658 ins_cost(150); 2659 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2660 ins_encode %{ 2661 int vector_len = 0; 2662 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2663 ExternalAddress(float_signmask()), vector_len); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 instruct absD_reg(regD dst) %{ 2669 predicate((UseSSE>=2) && (UseAVX == 0)); 2670 match(Set dst (AbsD dst)); 2671 ins_cost(150); 2672 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2673 "# abs double by sign masking" %} 2674 ins_encode %{ 2675 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2676 %} 2677 ins_pipe(pipe_slow); 2678 %} 2679 2680 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2681 predicate(UseAVX > 0); 2682 match(Set dst (AbsD src)); 2683 ins_cost(150); 2684 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2685 "# abs double by sign masking" %} 2686 ins_encode %{ 2687 int vector_len = 0; 2688 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2689 ExternalAddress(double_signmask()), vector_len); 2690 %} 2691 ins_pipe(pipe_slow); 2692 %} 2693 2694 instruct negF_reg(regF dst) %{ 2695 predicate((UseSSE>=1) && (UseAVX == 0)); 2696 match(Set dst (NegF dst)); 2697 ins_cost(150); 2698 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2699 ins_encode %{ 2700 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2701 %} 2702 ins_pipe(pipe_slow); 2703 %} 2704 2705 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2706 predicate(UseAVX > 0); 2707 match(Set dst (NegF src)); 2708 ins_cost(150); 2709 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2710 ins_encode %{ 2711 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2712 ExternalAddress(float_signflip())); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct negD_reg(regD dst) %{ 2718 predicate((UseSSE>=2) && (UseAVX == 0)); 2719 match(Set dst (NegD dst)); 2720 ins_cost(150); 2721 format %{ "xorpd $dst, [0x8000000000000000]\t" 2722 "# neg double by sign flipping" %} 2723 ins_encode %{ 2724 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2725 %} 2726 ins_pipe(pipe_slow); 2727 %} 2728 2729 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2730 predicate(UseAVX > 0); 2731 match(Set dst (NegD src)); 2732 ins_cost(150); 2733 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2734 "# neg double by sign flipping" %} 2735 ins_encode %{ 2736 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2737 ExternalAddress(double_signflip())); 2738 %} 2739 ins_pipe(pipe_slow); 2740 %} 2741 2742 instruct sqrtF_reg(regF dst, regF src) %{ 2743 predicate(UseSSE>=1); 2744 match(Set dst (SqrtF src)); 2745 2746 format %{ "sqrtss $dst, $src" %} 2747 ins_cost(150); 2748 ins_encode %{ 2749 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2750 %} 2751 ins_pipe(pipe_slow); 2752 %} 2753 2754 instruct sqrtF_mem(regF dst, memory src) %{ 2755 predicate(UseSSE>=1); 2756 match(Set dst (SqrtF (LoadF src))); 2757 2758 format %{ "sqrtss $dst, $src" %} 2759 ins_cost(150); 2760 ins_encode %{ 2761 __ sqrtss($dst$$XMMRegister, $src$$Address); 2762 %} 2763 ins_pipe(pipe_slow); 2764 %} 2765 2766 instruct sqrtF_imm(regF dst, immF con) %{ 2767 predicate(UseSSE>=1); 2768 match(Set dst (SqrtF con)); 2769 2770 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2771 ins_cost(150); 2772 ins_encode %{ 2773 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2774 %} 2775 ins_pipe(pipe_slow); 2776 %} 2777 2778 instruct sqrtD_reg(regD dst, regD src) %{ 2779 predicate(UseSSE>=2); 2780 match(Set dst (SqrtD src)); 2781 2782 format %{ "sqrtsd $dst, $src" %} 2783 ins_cost(150); 2784 ins_encode %{ 2785 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2786 %} 2787 ins_pipe(pipe_slow); 2788 %} 2789 2790 instruct sqrtD_mem(regD dst, memory src) %{ 2791 predicate(UseSSE>=2); 2792 match(Set dst (SqrtD (LoadD src))); 2793 2794 format %{ "sqrtsd $dst, $src" %} 2795 ins_cost(150); 2796 ins_encode %{ 2797 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2798 %} 2799 ins_pipe(pipe_slow); 2800 %} 2801 2802 instruct sqrtD_imm(regD dst, immD con) %{ 2803 predicate(UseSSE>=2); 2804 match(Set dst (SqrtD con)); 2805 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2806 ins_cost(150); 2807 ins_encode %{ 2808 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2809 %} 2810 ins_pipe(pipe_slow); 2811 %} 2812 2813 instruct onspinwait() %{ 2814 match(OnSpinWait); 2815 ins_cost(200); 2816 2817 format %{ 2818 $$template 2819 $$emit$$"pause\t! membar_onspinwait" 2820 %} 2821 ins_encode %{ 2822 __ pause(); 2823 %} 2824 ins_pipe(pipe_slow); 2825 %} 2826 2827 // a * b + c 2828 instruct fmaD_reg(regD a, regD b, regD c) %{ 2829 predicate(UseFMA); 2830 match(Set c (FmaD c (Binary a b))); 2831 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2832 ins_cost(150); 2833 ins_encode %{ 2834 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2835 %} 2836 ins_pipe( pipe_slow ); 2837 %} 2838 2839 // a * b + c 2840 instruct fmaF_reg(regF a, regF b, regF c) %{ 2841 predicate(UseFMA); 2842 match(Set c (FmaF c (Binary a b))); 2843 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2844 ins_cost(150); 2845 ins_encode %{ 2846 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2847 %} 2848 ins_pipe( pipe_slow ); 2849 %} 2850 2851 // ====================VECTOR INSTRUCTIONS===================================== 2852 2853 2854 // Load vectors (4 bytes long) 2855 instruct loadV4(vecS dst, memory mem) %{ 2856 predicate(n->as_LoadVector()->memory_size() == 4); 2857 match(Set dst (LoadVector mem)); 2858 ins_cost(125); 2859 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2860 ins_encode %{ 2861 __ movdl($dst$$XMMRegister, $mem$$Address); 2862 %} 2863 ins_pipe( pipe_slow ); 2864 %} 2865 2866 // Load vectors (4 bytes long) 2867 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2868 match(Set dst src); 2869 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2870 ins_encode %{ 2871 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2872 %} 2873 ins_pipe( fpu_reg_reg ); 2874 %} 2875 2876 // Load vectors (4 bytes long) 2877 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2878 match(Set dst src); 2879 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2880 ins_encode %{ 2881 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2882 %} 2883 ins_pipe( fpu_reg_reg ); 2884 %} 2885 2886 // Load vectors (8 bytes long) 2887 instruct loadV8(vecD dst, memory mem) %{ 2888 predicate(n->as_LoadVector()->memory_size() == 8); 2889 match(Set dst (LoadVector mem)); 2890 ins_cost(125); 2891 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2892 ins_encode %{ 2893 __ movq($dst$$XMMRegister, $mem$$Address); 2894 %} 2895 ins_pipe( pipe_slow ); 2896 %} 2897 2898 // Load vectors (8 bytes long) 2899 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2900 match(Set dst src); 2901 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2902 ins_encode %{ 2903 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2904 %} 2905 ins_pipe( fpu_reg_reg ); 2906 %} 2907 2908 // Load vectors (8 bytes long) 2909 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2910 match(Set dst src); 2911 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2912 ins_encode %{ 2913 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2914 %} 2915 ins_pipe( fpu_reg_reg ); 2916 %} 2917 2918 // Load vectors (16 bytes long) 2919 instruct loadV16(vecX dst, memory mem) %{ 2920 predicate(n->as_LoadVector()->memory_size() == 16); 2921 match(Set dst (LoadVector mem)); 2922 ins_cost(125); 2923 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2924 ins_encode %{ 2925 __ movdqu($dst$$XMMRegister, $mem$$Address); 2926 %} 2927 ins_pipe( pipe_slow ); 2928 %} 2929 2930 // Load vectors (16 bytes long) 2931 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2932 match(Set dst src); 2933 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2934 ins_encode %{ 2935 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2936 int vector_len = 2; 2937 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2938 } else { 2939 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2940 } 2941 %} 2942 ins_pipe( fpu_reg_reg ); 2943 %} 2944 2945 // Load vectors (16 bytes long) 2946 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2947 match(Set dst src); 2948 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2949 ins_encode %{ 2950 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2951 int vector_len = 2; 2952 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2953 } else { 2954 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2955 } 2956 %} 2957 ins_pipe( fpu_reg_reg ); 2958 %} 2959 2960 // Load vectors (32 bytes long) 2961 instruct loadV32(vecY dst, memory mem) %{ 2962 predicate(n->as_LoadVector()->memory_size() == 32); 2963 match(Set dst (LoadVector mem)); 2964 ins_cost(125); 2965 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2966 ins_encode %{ 2967 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2968 %} 2969 ins_pipe( pipe_slow ); 2970 %} 2971 2972 // Load vectors (32 bytes long) 2973 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 2974 match(Set dst src); 2975 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2976 ins_encode %{ 2977 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2978 int vector_len = 2; 2979 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2980 } else { 2981 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2982 } 2983 %} 2984 ins_pipe( fpu_reg_reg ); 2985 %} 2986 2987 // Load vectors (32 bytes long) 2988 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 2989 match(Set dst src); 2990 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2991 ins_encode %{ 2992 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2993 int vector_len = 2; 2994 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2995 } else { 2996 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2997 } 2998 %} 2999 ins_pipe( fpu_reg_reg ); 3000 %} 3001 3002 // Load vectors (64 bytes long) 3003 instruct loadV64_dword(vecZ dst, memory mem) %{ 3004 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3005 match(Set dst (LoadVector mem)); 3006 ins_cost(125); 3007 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3008 ins_encode %{ 3009 int vector_len = 2; 3010 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3011 %} 3012 ins_pipe( pipe_slow ); 3013 %} 3014 3015 // Load vectors (64 bytes long) 3016 instruct loadV64_qword(vecZ dst, memory mem) %{ 3017 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3018 match(Set dst (LoadVector mem)); 3019 ins_cost(125); 3020 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3021 ins_encode %{ 3022 int vector_len = 2; 3023 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3024 %} 3025 ins_pipe( pipe_slow ); 3026 %} 3027 3028 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3029 match(Set dst src); 3030 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3031 ins_encode %{ 3032 int vector_len = 2; 3033 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3034 %} 3035 ins_pipe( fpu_reg_reg ); 3036 %} 3037 3038 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3039 match(Set dst src); 3040 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3041 ins_encode %{ 3042 int vector_len = 2; 3043 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3044 %} 3045 ins_pipe( fpu_reg_reg ); 3046 %} 3047 3048 // Store vectors 3049 instruct storeV4(memory mem, vecS src) %{ 3050 predicate(n->as_StoreVector()->memory_size() == 4); 3051 match(Set mem (StoreVector mem src)); 3052 ins_cost(145); 3053 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3054 ins_encode %{ 3055 __ movdl($mem$$Address, $src$$XMMRegister); 3056 %} 3057 ins_pipe( pipe_slow ); 3058 %} 3059 3060 instruct storeV8(memory mem, vecD src) %{ 3061 predicate(n->as_StoreVector()->memory_size() == 8); 3062 match(Set mem (StoreVector mem src)); 3063 ins_cost(145); 3064 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3065 ins_encode %{ 3066 __ movq($mem$$Address, $src$$XMMRegister); 3067 %} 3068 ins_pipe( pipe_slow ); 3069 %} 3070 3071 instruct storeV16(memory mem, vecX src) %{ 3072 predicate(n->as_StoreVector()->memory_size() == 16); 3073 match(Set mem (StoreVector mem src)); 3074 ins_cost(145); 3075 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3076 ins_encode %{ 3077 __ movdqu($mem$$Address, $src$$XMMRegister); 3078 %} 3079 ins_pipe( pipe_slow ); 3080 %} 3081 3082 instruct storeV32(memory mem, vecY src) %{ 3083 predicate(n->as_StoreVector()->memory_size() == 32); 3084 match(Set mem (StoreVector mem src)); 3085 ins_cost(145); 3086 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3087 ins_encode %{ 3088 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3089 %} 3090 ins_pipe( pipe_slow ); 3091 %} 3092 3093 instruct storeV64_dword(memory mem, vecZ src) %{ 3094 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3095 match(Set mem (StoreVector mem src)); 3096 ins_cost(145); 3097 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3098 ins_encode %{ 3099 int vector_len = 2; 3100 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3101 %} 3102 ins_pipe( pipe_slow ); 3103 %} 3104 3105 instruct storeV64_qword(memory mem, vecZ src) %{ 3106 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3107 match(Set mem (StoreVector mem src)); 3108 ins_cost(145); 3109 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3110 ins_encode %{ 3111 int vector_len = 2; 3112 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3113 %} 3114 ins_pipe( pipe_slow ); 3115 %} 3116 3117 // ====================LEGACY REPLICATE======================================= 3118 3119 instruct Repl4B_mem(vecS dst, memory mem) %{ 3120 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3121 match(Set dst (ReplicateB (LoadB mem))); 3122 format %{ "punpcklbw $dst,$mem\n\t" 3123 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3124 ins_encode %{ 3125 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3126 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3127 %} 3128 ins_pipe( pipe_slow ); 3129 %} 3130 3131 instruct Repl8B_mem(vecD dst, memory mem) %{ 3132 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3133 match(Set dst (ReplicateB (LoadB mem))); 3134 format %{ "punpcklbw $dst,$mem\n\t" 3135 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3136 ins_encode %{ 3137 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3138 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3139 %} 3140 ins_pipe( pipe_slow ); 3141 %} 3142 3143 instruct Repl16B(vecX dst, rRegI src) %{ 3144 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3145 match(Set dst (ReplicateB src)); 3146 format %{ "movd $dst,$src\n\t" 3147 "punpcklbw $dst,$dst\n\t" 3148 "pshuflw $dst,$dst,0x00\n\t" 3149 "punpcklqdq $dst,$dst\t! replicate16B" %} 3150 ins_encode %{ 3151 __ movdl($dst$$XMMRegister, $src$$Register); 3152 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3153 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3154 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3155 %} 3156 ins_pipe( pipe_slow ); 3157 %} 3158 3159 instruct Repl16B_mem(vecX dst, memory mem) %{ 3160 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3161 match(Set dst (ReplicateB (LoadB mem))); 3162 format %{ "punpcklbw $dst,$mem\n\t" 3163 "pshuflw $dst,$dst,0x00\n\t" 3164 "punpcklqdq $dst,$dst\t! replicate16B" %} 3165 ins_encode %{ 3166 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3167 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3168 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3169 %} 3170 ins_pipe( pipe_slow ); 3171 %} 3172 3173 instruct Repl32B(vecY dst, rRegI src) %{ 3174 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3175 match(Set dst (ReplicateB src)); 3176 format %{ "movd $dst,$src\n\t" 3177 "punpcklbw $dst,$dst\n\t" 3178 "pshuflw $dst,$dst,0x00\n\t" 3179 "punpcklqdq $dst,$dst\n\t" 3180 "vinserti128_high $dst,$dst\t! replicate32B" %} 3181 ins_encode %{ 3182 __ movdl($dst$$XMMRegister, $src$$Register); 3183 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3184 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3185 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3186 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3187 %} 3188 ins_pipe( pipe_slow ); 3189 %} 3190 3191 instruct Repl32B_mem(vecY dst, memory mem) %{ 3192 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3193 match(Set dst (ReplicateB (LoadB mem))); 3194 format %{ "punpcklbw $dst,$mem\n\t" 3195 "pshuflw $dst,$dst,0x00\n\t" 3196 "punpcklqdq $dst,$dst\n\t" 3197 "vinserti128_high $dst,$dst\t! replicate32B" %} 3198 ins_encode %{ 3199 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3200 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3201 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3202 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3203 %} 3204 ins_pipe( pipe_slow ); 3205 %} 3206 3207 instruct Repl64B(legVecZ dst, rRegI src) %{ 3208 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3209 match(Set dst (ReplicateB src)); 3210 format %{ "movd $dst,$src\n\t" 3211 "punpcklbw $dst,$dst\n\t" 3212 "pshuflw $dst,$dst,0x00\n\t" 3213 "punpcklqdq $dst,$dst\n\t" 3214 "vinserti128_high $dst,$dst\t" 3215 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3216 ins_encode %{ 3217 __ movdl($dst$$XMMRegister, $src$$Register); 3218 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3219 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3220 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3221 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3222 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3223 %} 3224 ins_pipe( pipe_slow ); 3225 %} 3226 3227 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3228 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3229 match(Set dst (ReplicateB (LoadB mem))); 3230 format %{ "punpcklbw $dst,$mem\n\t" 3231 "pshuflw $dst,$dst,0x00\n\t" 3232 "punpcklqdq $dst,$dst\n\t" 3233 "vinserti128_high $dst,$dst\t" 3234 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3235 ins_encode %{ 3236 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3237 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3238 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3239 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3240 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3241 %} 3242 ins_pipe( pipe_slow ); 3243 %} 3244 3245 instruct Repl16B_imm(vecX dst, immI con) %{ 3246 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3247 match(Set dst (ReplicateB con)); 3248 format %{ "movq $dst,[$constantaddress]\n\t" 3249 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3250 ins_encode %{ 3251 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3252 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3253 %} 3254 ins_pipe( pipe_slow ); 3255 %} 3256 3257 instruct Repl32B_imm(vecY dst, immI con) %{ 3258 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3259 match(Set dst (ReplicateB con)); 3260 format %{ "movq $dst,[$constantaddress]\n\t" 3261 "punpcklqdq $dst,$dst\n\t" 3262 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3263 ins_encode %{ 3264 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3265 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3266 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3267 %} 3268 ins_pipe( pipe_slow ); 3269 %} 3270 3271 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3272 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3273 match(Set dst (ReplicateB con)); 3274 format %{ "movq $dst,[$constantaddress]\n\t" 3275 "punpcklqdq $dst,$dst\n\t" 3276 "vinserti128_high $dst,$dst\t" 3277 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3278 ins_encode %{ 3279 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3280 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3281 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3282 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3283 %} 3284 ins_pipe( pipe_slow ); 3285 %} 3286 3287 instruct Repl4S(vecD dst, rRegI src) %{ 3288 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3289 match(Set dst (ReplicateS src)); 3290 format %{ "movd $dst,$src\n\t" 3291 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3292 ins_encode %{ 3293 __ movdl($dst$$XMMRegister, $src$$Register); 3294 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3295 %} 3296 ins_pipe( pipe_slow ); 3297 %} 3298 3299 instruct Repl4S_mem(vecD dst, memory mem) %{ 3300 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3301 match(Set dst (ReplicateS (LoadS mem))); 3302 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3303 ins_encode %{ 3304 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3305 %} 3306 ins_pipe( pipe_slow ); 3307 %} 3308 3309 instruct Repl8S(vecX dst, rRegI src) %{ 3310 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3311 match(Set dst (ReplicateS src)); 3312 format %{ "movd $dst,$src\n\t" 3313 "pshuflw $dst,$dst,0x00\n\t" 3314 "punpcklqdq $dst,$dst\t! replicate8S" %} 3315 ins_encode %{ 3316 __ movdl($dst$$XMMRegister, $src$$Register); 3317 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3318 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3319 %} 3320 ins_pipe( pipe_slow ); 3321 %} 3322 3323 instruct Repl8S_mem(vecX dst, memory mem) %{ 3324 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3325 match(Set dst (ReplicateS (LoadS mem))); 3326 format %{ "pshuflw $dst,$mem,0x00\n\t" 3327 "punpcklqdq $dst,$dst\t! replicate8S" %} 3328 ins_encode %{ 3329 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3330 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3331 %} 3332 ins_pipe( pipe_slow ); 3333 %} 3334 3335 instruct Repl8S_imm(vecX dst, immI con) %{ 3336 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3337 match(Set dst (ReplicateS con)); 3338 format %{ "movq $dst,[$constantaddress]\n\t" 3339 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3340 ins_encode %{ 3341 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3342 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3343 %} 3344 ins_pipe( pipe_slow ); 3345 %} 3346 3347 instruct Repl16S(vecY dst, rRegI src) %{ 3348 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3349 match(Set dst (ReplicateS src)); 3350 format %{ "movd $dst,$src\n\t" 3351 "pshuflw $dst,$dst,0x00\n\t" 3352 "punpcklqdq $dst,$dst\n\t" 3353 "vinserti128_high $dst,$dst\t! replicate16S" %} 3354 ins_encode %{ 3355 __ movdl($dst$$XMMRegister, $src$$Register); 3356 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3357 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3358 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3359 %} 3360 ins_pipe( pipe_slow ); 3361 %} 3362 3363 instruct Repl16S_mem(vecY dst, memory mem) %{ 3364 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3365 match(Set dst (ReplicateS (LoadS mem))); 3366 format %{ "pshuflw $dst,$mem,0x00\n\t" 3367 "punpcklqdq $dst,$dst\n\t" 3368 "vinserti128_high $dst,$dst\t! replicate16S" %} 3369 ins_encode %{ 3370 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3371 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3372 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 instruct Repl16S_imm(vecY dst, immI con) %{ 3378 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3379 match(Set dst (ReplicateS con)); 3380 format %{ "movq $dst,[$constantaddress]\n\t" 3381 "punpcklqdq $dst,$dst\n\t" 3382 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3383 ins_encode %{ 3384 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3386 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 instruct Repl32S(legVecZ dst, rRegI src) %{ 3392 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3393 match(Set dst (ReplicateS src)); 3394 format %{ "movd $dst,$src\n\t" 3395 "pshuflw $dst,$dst,0x00\n\t" 3396 "punpcklqdq $dst,$dst\n\t" 3397 "vinserti128_high $dst,$dst\t" 3398 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3399 ins_encode %{ 3400 __ movdl($dst$$XMMRegister, $src$$Register); 3401 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3402 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3403 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3404 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3405 %} 3406 ins_pipe( pipe_slow ); 3407 %} 3408 3409 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3410 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3411 match(Set dst (ReplicateS (LoadS mem))); 3412 format %{ "pshuflw $dst,$mem,0x00\n\t" 3413 "punpcklqdq $dst,$dst\n\t" 3414 "vinserti128_high $dst,$dst\t" 3415 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3416 ins_encode %{ 3417 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3418 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3419 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3420 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3421 %} 3422 ins_pipe( pipe_slow ); 3423 %} 3424 3425 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3426 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3427 match(Set dst (ReplicateS con)); 3428 format %{ "movq $dst,[$constantaddress]\n\t" 3429 "punpcklqdq $dst,$dst\n\t" 3430 "vinserti128_high $dst,$dst\t" 3431 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3432 ins_encode %{ 3433 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3434 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3435 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3436 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3437 %} 3438 ins_pipe( pipe_slow ); 3439 %} 3440 3441 instruct Repl4I(vecX dst, rRegI src) %{ 3442 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3443 match(Set dst (ReplicateI src)); 3444 format %{ "movd $dst,$src\n\t" 3445 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3446 ins_encode %{ 3447 __ movdl($dst$$XMMRegister, $src$$Register); 3448 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3449 %} 3450 ins_pipe( pipe_slow ); 3451 %} 3452 3453 instruct Repl4I_mem(vecX dst, memory mem) %{ 3454 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3455 match(Set dst (ReplicateI (LoadI mem))); 3456 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3457 ins_encode %{ 3458 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3459 %} 3460 ins_pipe( pipe_slow ); 3461 %} 3462 3463 instruct Repl8I(vecY dst, rRegI src) %{ 3464 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3465 match(Set dst (ReplicateI src)); 3466 format %{ "movd $dst,$src\n\t" 3467 "pshufd $dst,$dst,0x00\n\t" 3468 "vinserti128_high $dst,$dst\t! replicate8I" %} 3469 ins_encode %{ 3470 __ movdl($dst$$XMMRegister, $src$$Register); 3471 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3472 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3473 %} 3474 ins_pipe( pipe_slow ); 3475 %} 3476 3477 instruct Repl8I_mem(vecY dst, memory mem) %{ 3478 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3479 match(Set dst (ReplicateI (LoadI mem))); 3480 format %{ "pshufd $dst,$mem,0x00\n\t" 3481 "vinserti128_high $dst,$dst\t! replicate8I" %} 3482 ins_encode %{ 3483 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3484 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3485 %} 3486 ins_pipe( pipe_slow ); 3487 %} 3488 3489 instruct Repl16I(legVecZ dst, rRegI src) %{ 3490 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3491 match(Set dst (ReplicateI src)); 3492 format %{ "movd $dst,$src\n\t" 3493 "pshufd $dst,$dst,0x00\n\t" 3494 "vinserti128_high $dst,$dst\t" 3495 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3496 ins_encode %{ 3497 __ movdl($dst$$XMMRegister, $src$$Register); 3498 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3499 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3500 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3501 %} 3502 ins_pipe( pipe_slow ); 3503 %} 3504 3505 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3506 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3507 match(Set dst (ReplicateI (LoadI mem))); 3508 format %{ "pshufd $dst,$mem,0x00\n\t" 3509 "vinserti128_high $dst,$dst\t" 3510 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3511 ins_encode %{ 3512 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3513 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3514 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 instruct Repl4I_imm(vecX dst, immI con) %{ 3520 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3521 match(Set dst (ReplicateI con)); 3522 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3523 "punpcklqdq $dst,$dst" %} 3524 ins_encode %{ 3525 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3526 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3527 %} 3528 ins_pipe( pipe_slow ); 3529 %} 3530 3531 instruct Repl8I_imm(vecY dst, immI con) %{ 3532 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3533 match(Set dst (ReplicateI con)); 3534 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3535 "punpcklqdq $dst,$dst\n\t" 3536 "vinserti128_high $dst,$dst" %} 3537 ins_encode %{ 3538 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3539 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3540 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3541 %} 3542 ins_pipe( pipe_slow ); 3543 %} 3544 3545 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3546 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3547 match(Set dst (ReplicateI con)); 3548 format %{ "movq $dst,[$constantaddress]\t" 3549 "punpcklqdq $dst,$dst\n\t" 3550 "vinserti128_high $dst,$dst" 3551 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3552 ins_encode %{ 3553 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3554 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3555 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3556 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3557 %} 3558 ins_pipe( pipe_slow ); 3559 %} 3560 3561 // Long could be loaded into xmm register directly from memory. 3562 instruct Repl2L_mem(vecX dst, memory mem) %{ 3563 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3564 match(Set dst (ReplicateL (LoadL mem))); 3565 format %{ "movq $dst,$mem\n\t" 3566 "punpcklqdq $dst,$dst\t! replicate2L" %} 3567 ins_encode %{ 3568 __ movq($dst$$XMMRegister, $mem$$Address); 3569 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3570 %} 3571 ins_pipe( pipe_slow ); 3572 %} 3573 3574 // Replicate long (8 byte) scalar to be vector 3575 #ifdef _LP64 3576 instruct Repl4L(vecY dst, rRegL src) %{ 3577 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3578 match(Set dst (ReplicateL src)); 3579 format %{ "movdq $dst,$src\n\t" 3580 "punpcklqdq $dst,$dst\n\t" 3581 "vinserti128_high $dst,$dst\t! replicate4L" %} 3582 ins_encode %{ 3583 __ movdq($dst$$XMMRegister, $src$$Register); 3584 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3585 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3586 %} 3587 ins_pipe( pipe_slow ); 3588 %} 3589 3590 instruct Repl8L(legVecZ dst, rRegL src) %{ 3591 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3592 match(Set dst (ReplicateL src)); 3593 format %{ "movdq $dst,$src\n\t" 3594 "punpcklqdq $dst,$dst\n\t" 3595 "vinserti128_high $dst,$dst\t" 3596 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3597 ins_encode %{ 3598 __ movdq($dst$$XMMRegister, $src$$Register); 3599 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3600 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3601 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3602 %} 3603 ins_pipe( pipe_slow ); 3604 %} 3605 #else // _LP64 3606 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3607 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3608 match(Set dst (ReplicateL src)); 3609 effect(TEMP dst, USE src, TEMP tmp); 3610 format %{ "movdl $dst,$src.lo\n\t" 3611 "movdl $tmp,$src.hi\n\t" 3612 "punpckldq $dst,$tmp\n\t" 3613 "punpcklqdq $dst,$dst\n\t" 3614 "vinserti128_high $dst,$dst\t! replicate4L" %} 3615 ins_encode %{ 3616 __ movdl($dst$$XMMRegister, $src$$Register); 3617 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3618 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3619 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3620 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3621 %} 3622 ins_pipe( pipe_slow ); 3623 %} 3624 3625 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3626 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3627 match(Set dst (ReplicateL src)); 3628 effect(TEMP dst, USE src, TEMP tmp); 3629 format %{ "movdl $dst,$src.lo\n\t" 3630 "movdl $tmp,$src.hi\n\t" 3631 "punpckldq $dst,$tmp\n\t" 3632 "punpcklqdq $dst,$dst\n\t" 3633 "vinserti128_high $dst,$dst\t" 3634 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3635 ins_encode %{ 3636 __ movdl($dst$$XMMRegister, $src$$Register); 3637 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3638 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3639 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3640 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3641 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3642 %} 3643 ins_pipe( pipe_slow ); 3644 %} 3645 #endif // _LP64 3646 3647 instruct Repl4L_imm(vecY dst, immL con) %{ 3648 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3649 match(Set dst (ReplicateL con)); 3650 format %{ "movq $dst,[$constantaddress]\n\t" 3651 "punpcklqdq $dst,$dst\n\t" 3652 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3653 ins_encode %{ 3654 __ movq($dst$$XMMRegister, $constantaddress($con)); 3655 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3656 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3657 %} 3658 ins_pipe( pipe_slow ); 3659 %} 3660 3661 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3662 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3663 match(Set dst (ReplicateL con)); 3664 format %{ "movq $dst,[$constantaddress]\n\t" 3665 "punpcklqdq $dst,$dst\n\t" 3666 "vinserti128_high $dst,$dst\t" 3667 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3668 ins_encode %{ 3669 __ movq($dst$$XMMRegister, $constantaddress($con)); 3670 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3671 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3672 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 instruct Repl4L_mem(vecY dst, memory mem) %{ 3678 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3679 match(Set dst (ReplicateL (LoadL mem))); 3680 format %{ "movq $dst,$mem\n\t" 3681 "punpcklqdq $dst,$dst\n\t" 3682 "vinserti128_high $dst,$dst\t! replicate4L" %} 3683 ins_encode %{ 3684 __ movq($dst$$XMMRegister, $mem$$Address); 3685 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3686 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3687 %} 3688 ins_pipe( pipe_slow ); 3689 %} 3690 3691 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3692 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3693 match(Set dst (ReplicateL (LoadL mem))); 3694 format %{ "movq $dst,$mem\n\t" 3695 "punpcklqdq $dst,$dst\n\t" 3696 "vinserti128_high $dst,$dst\t" 3697 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3698 ins_encode %{ 3699 __ movq($dst$$XMMRegister, $mem$$Address); 3700 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3701 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3702 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct Repl2F_mem(vecD dst, memory mem) %{ 3708 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3709 match(Set dst (ReplicateF (LoadF mem))); 3710 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3711 ins_encode %{ 3712 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct Repl4F_mem(vecX dst, memory mem) %{ 3718 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3719 match(Set dst (ReplicateF (LoadF mem))); 3720 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3721 ins_encode %{ 3722 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3723 %} 3724 ins_pipe( pipe_slow ); 3725 %} 3726 3727 instruct Repl8F(vecY dst, vlRegF src) %{ 3728 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3729 match(Set dst (ReplicateF src)); 3730 format %{ "pshufd $dst,$src,0x00\n\t" 3731 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3732 ins_encode %{ 3733 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3734 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3735 %} 3736 ins_pipe( pipe_slow ); 3737 %} 3738 3739 instruct Repl8F_mem(vecY dst, memory mem) %{ 3740 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3741 match(Set dst (ReplicateF (LoadF mem))); 3742 format %{ "pshufd $dst,$mem,0x00\n\t" 3743 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3744 ins_encode %{ 3745 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3746 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3747 %} 3748 ins_pipe( pipe_slow ); 3749 %} 3750 3751 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3752 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3753 match(Set dst (ReplicateF src)); 3754 format %{ "pshufd $dst,$src,0x00\n\t" 3755 "vinsertf128_high $dst,$dst\t" 3756 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3757 ins_encode %{ 3758 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3759 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3760 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3766 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3767 match(Set dst (ReplicateF (LoadF mem))); 3768 format %{ "pshufd $dst,$mem,0x00\n\t" 3769 "vinsertf128_high $dst,$dst\t" 3770 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3771 ins_encode %{ 3772 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3773 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3774 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3775 %} 3776 ins_pipe( pipe_slow ); 3777 %} 3778 3779 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3780 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3781 match(Set dst (ReplicateF zero)); 3782 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3783 ins_encode %{ 3784 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3785 %} 3786 ins_pipe( fpu_reg_reg ); 3787 %} 3788 3789 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3790 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3791 match(Set dst (ReplicateF zero)); 3792 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3793 ins_encode %{ 3794 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3795 %} 3796 ins_pipe( fpu_reg_reg ); 3797 %} 3798 3799 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3800 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3801 match(Set dst (ReplicateF zero)); 3802 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3803 ins_encode %{ 3804 int vector_len = 1; 3805 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3806 %} 3807 ins_pipe( fpu_reg_reg ); 3808 %} 3809 3810 instruct Repl2D_mem(vecX dst, memory mem) %{ 3811 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3812 match(Set dst (ReplicateD (LoadD mem))); 3813 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3814 ins_encode %{ 3815 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 instruct Repl4D(vecY dst, vlRegD src) %{ 3821 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3822 match(Set dst (ReplicateD src)); 3823 format %{ "pshufd $dst,$src,0x44\n\t" 3824 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3825 ins_encode %{ 3826 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3827 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3828 %} 3829 ins_pipe( pipe_slow ); 3830 %} 3831 3832 instruct Repl4D_mem(vecY dst, memory mem) %{ 3833 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3834 match(Set dst (ReplicateD (LoadD mem))); 3835 format %{ "pshufd $dst,$mem,0x44\n\t" 3836 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3837 ins_encode %{ 3838 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3839 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3840 %} 3841 ins_pipe( pipe_slow ); 3842 %} 3843 3844 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3845 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3846 match(Set dst (ReplicateD src)); 3847 format %{ "pshufd $dst,$src,0x44\n\t" 3848 "vinsertf128_high $dst,$dst\t" 3849 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3850 ins_encode %{ 3851 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3852 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3853 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3859 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3860 match(Set dst (ReplicateD (LoadD mem))); 3861 format %{ "pshufd $dst,$mem,0x44\n\t" 3862 "vinsertf128_high $dst,$dst\t" 3863 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3864 ins_encode %{ 3865 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3866 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3867 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 // Replicate double (8 byte) scalar zero to be vector 3873 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3874 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3875 match(Set dst (ReplicateD zero)); 3876 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3877 ins_encode %{ 3878 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3879 %} 3880 ins_pipe( fpu_reg_reg ); 3881 %} 3882 3883 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3884 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3885 match(Set dst (ReplicateD zero)); 3886 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3887 ins_encode %{ 3888 int vector_len = 1; 3889 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3890 %} 3891 ins_pipe( fpu_reg_reg ); 3892 %} 3893 3894 // ====================GENERIC REPLICATE========================================== 3895 3896 // Replicate byte scalar to be vector 3897 instruct Repl4B(vecS dst, rRegI src) %{ 3898 predicate(n->as_Vector()->length() == 4); 3899 match(Set dst (ReplicateB src)); 3900 format %{ "movd $dst,$src\n\t" 3901 "punpcklbw $dst,$dst\n\t" 3902 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3903 ins_encode %{ 3904 __ movdl($dst$$XMMRegister, $src$$Register); 3905 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3906 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3907 %} 3908 ins_pipe( pipe_slow ); 3909 %} 3910 3911 instruct Repl8B(vecD dst, rRegI src) %{ 3912 predicate(n->as_Vector()->length() == 8); 3913 match(Set dst (ReplicateB src)); 3914 format %{ "movd $dst,$src\n\t" 3915 "punpcklbw $dst,$dst\n\t" 3916 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3917 ins_encode %{ 3918 __ movdl($dst$$XMMRegister, $src$$Register); 3919 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3920 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3921 %} 3922 ins_pipe( pipe_slow ); 3923 %} 3924 3925 // Replicate byte scalar immediate to be vector by loading from const table. 3926 instruct Repl4B_imm(vecS dst, immI con) %{ 3927 predicate(n->as_Vector()->length() == 4); 3928 match(Set dst (ReplicateB con)); 3929 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3930 ins_encode %{ 3931 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3932 %} 3933 ins_pipe( pipe_slow ); 3934 %} 3935 3936 instruct Repl8B_imm(vecD dst, immI con) %{ 3937 predicate(n->as_Vector()->length() == 8); 3938 match(Set dst (ReplicateB con)); 3939 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3940 ins_encode %{ 3941 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3942 %} 3943 ins_pipe( pipe_slow ); 3944 %} 3945 3946 // Replicate byte scalar zero to be vector 3947 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3948 predicate(n->as_Vector()->length() == 4); 3949 match(Set dst (ReplicateB zero)); 3950 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3951 ins_encode %{ 3952 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3953 %} 3954 ins_pipe( fpu_reg_reg ); 3955 %} 3956 3957 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3958 predicate(n->as_Vector()->length() == 8); 3959 match(Set dst (ReplicateB zero)); 3960 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3961 ins_encode %{ 3962 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3963 %} 3964 ins_pipe( fpu_reg_reg ); 3965 %} 3966 3967 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3968 predicate(n->as_Vector()->length() == 16); 3969 match(Set dst (ReplicateB zero)); 3970 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3971 ins_encode %{ 3972 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3973 %} 3974 ins_pipe( fpu_reg_reg ); 3975 %} 3976 3977 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3978 predicate(n->as_Vector()->length() == 32); 3979 match(Set dst (ReplicateB zero)); 3980 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3981 ins_encode %{ 3982 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3983 int vector_len = 1; 3984 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3985 %} 3986 ins_pipe( fpu_reg_reg ); 3987 %} 3988 3989 // Replicate char/short (2 byte) scalar to be vector 3990 instruct Repl2S(vecS dst, rRegI src) %{ 3991 predicate(n->as_Vector()->length() == 2); 3992 match(Set dst (ReplicateS src)); 3993 format %{ "movd $dst,$src\n\t" 3994 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3995 ins_encode %{ 3996 __ movdl($dst$$XMMRegister, $src$$Register); 3997 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3998 %} 3999 ins_pipe( fpu_reg_reg ); 4000 %} 4001 4002 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 4003 instruct Repl2S_imm(vecS dst, immI con) %{ 4004 predicate(n->as_Vector()->length() == 2); 4005 match(Set dst (ReplicateS con)); 4006 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4007 ins_encode %{ 4008 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4009 %} 4010 ins_pipe( fpu_reg_reg ); 4011 %} 4012 4013 instruct Repl4S_imm(vecD dst, immI con) %{ 4014 predicate(n->as_Vector()->length() == 4); 4015 match(Set dst (ReplicateS con)); 4016 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4017 ins_encode %{ 4018 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4019 %} 4020 ins_pipe( fpu_reg_reg ); 4021 %} 4022 4023 // Replicate char/short (2 byte) scalar zero to be vector 4024 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4025 predicate(n->as_Vector()->length() == 2); 4026 match(Set dst (ReplicateS zero)); 4027 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4028 ins_encode %{ 4029 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4030 %} 4031 ins_pipe( fpu_reg_reg ); 4032 %} 4033 4034 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4035 predicate(n->as_Vector()->length() == 4); 4036 match(Set dst (ReplicateS zero)); 4037 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4038 ins_encode %{ 4039 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4040 %} 4041 ins_pipe( fpu_reg_reg ); 4042 %} 4043 4044 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4045 predicate(n->as_Vector()->length() == 8); 4046 match(Set dst (ReplicateS zero)); 4047 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4048 ins_encode %{ 4049 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4050 %} 4051 ins_pipe( fpu_reg_reg ); 4052 %} 4053 4054 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4055 predicate(n->as_Vector()->length() == 16); 4056 match(Set dst (ReplicateS zero)); 4057 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4058 ins_encode %{ 4059 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4060 int vector_len = 1; 4061 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4062 %} 4063 ins_pipe( fpu_reg_reg ); 4064 %} 4065 4066 // Replicate integer (4 byte) scalar to be vector 4067 instruct Repl2I(vecD dst, rRegI src) %{ 4068 predicate(n->as_Vector()->length() == 2); 4069 match(Set dst (ReplicateI src)); 4070 format %{ "movd $dst,$src\n\t" 4071 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4072 ins_encode %{ 4073 __ movdl($dst$$XMMRegister, $src$$Register); 4074 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4075 %} 4076 ins_pipe( fpu_reg_reg ); 4077 %} 4078 4079 // Integer could be loaded into xmm register directly from memory. 4080 instruct Repl2I_mem(vecD dst, memory mem) %{ 4081 predicate(n->as_Vector()->length() == 2); 4082 match(Set dst (ReplicateI (LoadI mem))); 4083 format %{ "movd $dst,$mem\n\t" 4084 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4085 ins_encode %{ 4086 __ movdl($dst$$XMMRegister, $mem$$Address); 4087 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4088 %} 4089 ins_pipe( fpu_reg_reg ); 4090 %} 4091 4092 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4093 instruct Repl2I_imm(vecD dst, immI con) %{ 4094 predicate(n->as_Vector()->length() == 2); 4095 match(Set dst (ReplicateI con)); 4096 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4097 ins_encode %{ 4098 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4099 %} 4100 ins_pipe( fpu_reg_reg ); 4101 %} 4102 4103 // Replicate integer (4 byte) scalar zero to be vector 4104 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4105 predicate(n->as_Vector()->length() == 2); 4106 match(Set dst (ReplicateI zero)); 4107 format %{ "pxor $dst,$dst\t! replicate2I" %} 4108 ins_encode %{ 4109 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4110 %} 4111 ins_pipe( fpu_reg_reg ); 4112 %} 4113 4114 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4115 predicate(n->as_Vector()->length() == 4); 4116 match(Set dst (ReplicateI zero)); 4117 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4118 ins_encode %{ 4119 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4120 %} 4121 ins_pipe( fpu_reg_reg ); 4122 %} 4123 4124 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4125 predicate(n->as_Vector()->length() == 8); 4126 match(Set dst (ReplicateI zero)); 4127 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4128 ins_encode %{ 4129 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4130 int vector_len = 1; 4131 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4132 %} 4133 ins_pipe( fpu_reg_reg ); 4134 %} 4135 4136 // Replicate long (8 byte) scalar to be vector 4137 #ifdef _LP64 4138 instruct Repl2L(vecX dst, rRegL src) %{ 4139 predicate(n->as_Vector()->length() == 2); 4140 match(Set dst (ReplicateL src)); 4141 format %{ "movdq $dst,$src\n\t" 4142 "punpcklqdq $dst,$dst\t! replicate2L" %} 4143 ins_encode %{ 4144 __ movdq($dst$$XMMRegister, $src$$Register); 4145 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4146 %} 4147 ins_pipe( pipe_slow ); 4148 %} 4149 #else // _LP64 4150 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4151 predicate(n->as_Vector()->length() == 2); 4152 match(Set dst (ReplicateL src)); 4153 effect(TEMP dst, USE src, TEMP tmp); 4154 format %{ "movdl $dst,$src.lo\n\t" 4155 "movdl $tmp,$src.hi\n\t" 4156 "punpckldq $dst,$tmp\n\t" 4157 "punpcklqdq $dst,$dst\t! replicate2L"%} 4158 ins_encode %{ 4159 __ movdl($dst$$XMMRegister, $src$$Register); 4160 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4161 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4162 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4163 %} 4164 ins_pipe( pipe_slow ); 4165 %} 4166 #endif // _LP64 4167 4168 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4169 instruct Repl2L_imm(vecX dst, immL con) %{ 4170 predicate(n->as_Vector()->length() == 2); 4171 match(Set dst (ReplicateL con)); 4172 format %{ "movq $dst,[$constantaddress]\n\t" 4173 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4174 ins_encode %{ 4175 __ movq($dst$$XMMRegister, $constantaddress($con)); 4176 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4177 %} 4178 ins_pipe( pipe_slow ); 4179 %} 4180 4181 // Replicate long (8 byte) scalar zero to be vector 4182 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4183 predicate(n->as_Vector()->length() == 2); 4184 match(Set dst (ReplicateL zero)); 4185 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4186 ins_encode %{ 4187 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4188 %} 4189 ins_pipe( fpu_reg_reg ); 4190 %} 4191 4192 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4193 predicate(n->as_Vector()->length() == 4); 4194 match(Set dst (ReplicateL zero)); 4195 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4196 ins_encode %{ 4197 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4198 int vector_len = 1; 4199 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4200 %} 4201 ins_pipe( fpu_reg_reg ); 4202 %} 4203 4204 // Replicate float (4 byte) scalar to be vector 4205 instruct Repl2F(vecD dst, vlRegF src) %{ 4206 predicate(n->as_Vector()->length() == 2); 4207 match(Set dst (ReplicateF src)); 4208 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4209 ins_encode %{ 4210 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4211 %} 4212 ins_pipe( fpu_reg_reg ); 4213 %} 4214 4215 instruct Repl4F(vecX dst, vlRegF src) %{ 4216 predicate(n->as_Vector()->length() == 4); 4217 match(Set dst (ReplicateF src)); 4218 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4219 ins_encode %{ 4220 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4221 %} 4222 ins_pipe( pipe_slow ); 4223 %} 4224 4225 // Replicate double (8 bytes) scalar to be vector 4226 instruct Repl2D(vecX dst, vlRegD src) %{ 4227 predicate(n->as_Vector()->length() == 2); 4228 match(Set dst (ReplicateD src)); 4229 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4230 ins_encode %{ 4231 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 // ====================EVEX REPLICATE============================================= 4237 4238 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4239 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4240 match(Set dst (ReplicateB (LoadB mem))); 4241 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4242 ins_encode %{ 4243 int vector_len = 0; 4244 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4245 %} 4246 ins_pipe( pipe_slow ); 4247 %} 4248 4249 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4250 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4251 match(Set dst (ReplicateB (LoadB mem))); 4252 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4253 ins_encode %{ 4254 int vector_len = 0; 4255 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 4260 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4261 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4262 match(Set dst (ReplicateB src)); 4263 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4264 ins_encode %{ 4265 int vector_len = 0; 4266 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4267 %} 4268 ins_pipe( pipe_slow ); 4269 %} 4270 4271 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4272 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4273 match(Set dst (ReplicateB (LoadB mem))); 4274 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4275 ins_encode %{ 4276 int vector_len = 0; 4277 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4278 %} 4279 ins_pipe( pipe_slow ); 4280 %} 4281 4282 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4283 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4284 match(Set dst (ReplicateB src)); 4285 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4286 ins_encode %{ 4287 int vector_len = 1; 4288 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4289 %} 4290 ins_pipe( pipe_slow ); 4291 %} 4292 4293 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4294 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4295 match(Set dst (ReplicateB (LoadB mem))); 4296 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4297 ins_encode %{ 4298 int vector_len = 1; 4299 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4300 %} 4301 ins_pipe( pipe_slow ); 4302 %} 4303 4304 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4305 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4306 match(Set dst (ReplicateB src)); 4307 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4308 ins_encode %{ 4309 int vector_len = 2; 4310 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4311 %} 4312 ins_pipe( pipe_slow ); 4313 %} 4314 4315 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4316 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4317 match(Set dst (ReplicateB (LoadB mem))); 4318 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4319 ins_encode %{ 4320 int vector_len = 2; 4321 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4322 %} 4323 ins_pipe( pipe_slow ); 4324 %} 4325 4326 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4327 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4328 match(Set dst (ReplicateB con)); 4329 format %{ "movq $dst,[$constantaddress]\n\t" 4330 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4331 ins_encode %{ 4332 int vector_len = 0; 4333 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4334 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4335 %} 4336 ins_pipe( pipe_slow ); 4337 %} 4338 4339 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4340 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4341 match(Set dst (ReplicateB con)); 4342 format %{ "movq $dst,[$constantaddress]\n\t" 4343 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4344 ins_encode %{ 4345 int vector_len = 1; 4346 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4347 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4353 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4354 match(Set dst (ReplicateB con)); 4355 format %{ "movq $dst,[$constantaddress]\n\t" 4356 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4357 ins_encode %{ 4358 int vector_len = 2; 4359 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4360 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4361 %} 4362 ins_pipe( pipe_slow ); 4363 %} 4364 4365 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4366 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4367 match(Set dst (ReplicateB zero)); 4368 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4369 ins_encode %{ 4370 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4371 int vector_len = 2; 4372 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4373 %} 4374 ins_pipe( fpu_reg_reg ); 4375 %} 4376 4377 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4378 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4379 match(Set dst (ReplicateS src)); 4380 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4381 ins_encode %{ 4382 int vector_len = 0; 4383 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4384 %} 4385 ins_pipe( pipe_slow ); 4386 %} 4387 4388 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4389 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4390 match(Set dst (ReplicateS (LoadS mem))); 4391 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4392 ins_encode %{ 4393 int vector_len = 0; 4394 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 4399 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4400 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4401 match(Set dst (ReplicateS src)); 4402 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4403 ins_encode %{ 4404 int vector_len = 0; 4405 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4406 %} 4407 ins_pipe( pipe_slow ); 4408 %} 4409 4410 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4411 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4412 match(Set dst (ReplicateS (LoadS mem))); 4413 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4414 ins_encode %{ 4415 int vector_len = 0; 4416 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4417 %} 4418 ins_pipe( pipe_slow ); 4419 %} 4420 4421 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4422 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4423 match(Set dst (ReplicateS src)); 4424 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4425 ins_encode %{ 4426 int vector_len = 1; 4427 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4428 %} 4429 ins_pipe( pipe_slow ); 4430 %} 4431 4432 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4433 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4434 match(Set dst (ReplicateS (LoadS mem))); 4435 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4436 ins_encode %{ 4437 int vector_len = 1; 4438 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4439 %} 4440 ins_pipe( pipe_slow ); 4441 %} 4442 4443 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4444 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4445 match(Set dst (ReplicateS src)); 4446 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4447 ins_encode %{ 4448 int vector_len = 2; 4449 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4450 %} 4451 ins_pipe( pipe_slow ); 4452 %} 4453 4454 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4455 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4456 match(Set dst (ReplicateS (LoadS mem))); 4457 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4458 ins_encode %{ 4459 int vector_len = 2; 4460 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4461 %} 4462 ins_pipe( pipe_slow ); 4463 %} 4464 4465 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4466 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4467 match(Set dst (ReplicateS con)); 4468 format %{ "movq $dst,[$constantaddress]\n\t" 4469 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4470 ins_encode %{ 4471 int vector_len = 0; 4472 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4473 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4474 %} 4475 ins_pipe( pipe_slow ); 4476 %} 4477 4478 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4479 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4480 match(Set dst (ReplicateS con)); 4481 format %{ "movq $dst,[$constantaddress]\n\t" 4482 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4483 ins_encode %{ 4484 int vector_len = 1; 4485 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4486 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4487 %} 4488 ins_pipe( pipe_slow ); 4489 %} 4490 4491 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4492 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4493 match(Set dst (ReplicateS con)); 4494 format %{ "movq $dst,[$constantaddress]\n\t" 4495 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4496 ins_encode %{ 4497 int vector_len = 2; 4498 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4499 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4500 %} 4501 ins_pipe( pipe_slow ); 4502 %} 4503 4504 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4505 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4506 match(Set dst (ReplicateS zero)); 4507 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4508 ins_encode %{ 4509 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4510 int vector_len = 2; 4511 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4512 %} 4513 ins_pipe( fpu_reg_reg ); 4514 %} 4515 4516 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4517 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4518 match(Set dst (ReplicateI src)); 4519 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4520 ins_encode %{ 4521 int vector_len = 0; 4522 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4523 %} 4524 ins_pipe( pipe_slow ); 4525 %} 4526 4527 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4528 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4529 match(Set dst (ReplicateI (LoadI mem))); 4530 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4531 ins_encode %{ 4532 int vector_len = 0; 4533 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4534 %} 4535 ins_pipe( pipe_slow ); 4536 %} 4537 4538 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4539 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4540 match(Set dst (ReplicateI src)); 4541 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4542 ins_encode %{ 4543 int vector_len = 1; 4544 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4545 %} 4546 ins_pipe( pipe_slow ); 4547 %} 4548 4549 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4550 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4551 match(Set dst (ReplicateI (LoadI mem))); 4552 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4553 ins_encode %{ 4554 int vector_len = 1; 4555 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4556 %} 4557 ins_pipe( pipe_slow ); 4558 %} 4559 4560 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4561 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4562 match(Set dst (ReplicateI src)); 4563 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4564 ins_encode %{ 4565 int vector_len = 2; 4566 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4567 %} 4568 ins_pipe( pipe_slow ); 4569 %} 4570 4571 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4572 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4573 match(Set dst (ReplicateI (LoadI mem))); 4574 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4575 ins_encode %{ 4576 int vector_len = 2; 4577 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4578 %} 4579 ins_pipe( pipe_slow ); 4580 %} 4581 4582 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4583 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4584 match(Set dst (ReplicateI con)); 4585 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4586 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4587 ins_encode %{ 4588 int vector_len = 0; 4589 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4590 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4591 %} 4592 ins_pipe( pipe_slow ); 4593 %} 4594 4595 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4596 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4597 match(Set dst (ReplicateI con)); 4598 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4599 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4600 ins_encode %{ 4601 int vector_len = 1; 4602 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4603 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 4608 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4609 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4610 match(Set dst (ReplicateI con)); 4611 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4612 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4613 ins_encode %{ 4614 int vector_len = 2; 4615 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4616 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4617 %} 4618 ins_pipe( pipe_slow ); 4619 %} 4620 4621 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4622 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4623 match(Set dst (ReplicateI zero)); 4624 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4625 ins_encode %{ 4626 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4627 int vector_len = 2; 4628 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4629 %} 4630 ins_pipe( fpu_reg_reg ); 4631 %} 4632 4633 // Replicate long (8 byte) scalar to be vector 4634 #ifdef _LP64 4635 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4636 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4637 match(Set dst (ReplicateL src)); 4638 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4639 ins_encode %{ 4640 int vector_len = 1; 4641 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4642 %} 4643 ins_pipe( pipe_slow ); 4644 %} 4645 4646 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4647 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4648 match(Set dst (ReplicateL src)); 4649 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4650 ins_encode %{ 4651 int vector_len = 2; 4652 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4653 %} 4654 ins_pipe( pipe_slow ); 4655 %} 4656 #else // _LP64 4657 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4658 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4659 match(Set dst (ReplicateL src)); 4660 effect(TEMP dst, USE src, TEMP tmp); 4661 format %{ "movdl $dst,$src.lo\n\t" 4662 "movdl $tmp,$src.hi\n\t" 4663 "punpckldq $dst,$tmp\n\t" 4664 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4665 ins_encode %{ 4666 int vector_len = 1; 4667 __ movdl($dst$$XMMRegister, $src$$Register); 4668 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4669 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4670 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4671 %} 4672 ins_pipe( pipe_slow ); 4673 %} 4674 4675 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4676 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4677 match(Set dst (ReplicateL src)); 4678 effect(TEMP dst, USE src, TEMP tmp); 4679 format %{ "movdl $dst,$src.lo\n\t" 4680 "movdl $tmp,$src.hi\n\t" 4681 "punpckldq $dst,$tmp\n\t" 4682 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4683 ins_encode %{ 4684 int vector_len = 2; 4685 __ movdl($dst$$XMMRegister, $src$$Register); 4686 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4687 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4688 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4689 %} 4690 ins_pipe( pipe_slow ); 4691 %} 4692 #endif // _LP64 4693 4694 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4695 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4696 match(Set dst (ReplicateL con)); 4697 format %{ "movq $dst,[$constantaddress]\n\t" 4698 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4699 ins_encode %{ 4700 int vector_len = 1; 4701 __ movq($dst$$XMMRegister, $constantaddress($con)); 4702 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4703 %} 4704 ins_pipe( pipe_slow ); 4705 %} 4706 4707 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4708 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4709 match(Set dst (ReplicateL con)); 4710 format %{ "movq $dst,[$constantaddress]\n\t" 4711 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4712 ins_encode %{ 4713 int vector_len = 2; 4714 __ movq($dst$$XMMRegister, $constantaddress($con)); 4715 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4716 %} 4717 ins_pipe( pipe_slow ); 4718 %} 4719 4720 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4721 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4722 match(Set dst (ReplicateL (LoadL mem))); 4723 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4724 ins_encode %{ 4725 int vector_len = 0; 4726 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4727 %} 4728 ins_pipe( pipe_slow ); 4729 %} 4730 4731 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4732 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4733 match(Set dst (ReplicateL (LoadL mem))); 4734 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4735 ins_encode %{ 4736 int vector_len = 1; 4737 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4738 %} 4739 ins_pipe( pipe_slow ); 4740 %} 4741 4742 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4743 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4744 match(Set dst (ReplicateL (LoadL mem))); 4745 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4746 ins_encode %{ 4747 int vector_len = 2; 4748 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4754 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4755 match(Set dst (ReplicateL zero)); 4756 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4757 ins_encode %{ 4758 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4759 int vector_len = 2; 4760 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4761 %} 4762 ins_pipe( fpu_reg_reg ); 4763 %} 4764 4765 instruct Repl8F_evex(vecY dst, regF src) %{ 4766 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4767 match(Set dst (ReplicateF src)); 4768 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4769 ins_encode %{ 4770 int vector_len = 1; 4771 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4772 %} 4773 ins_pipe( pipe_slow ); 4774 %} 4775 4776 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4777 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4778 match(Set dst (ReplicateF (LoadF mem))); 4779 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4780 ins_encode %{ 4781 int vector_len = 1; 4782 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4783 %} 4784 ins_pipe( pipe_slow ); 4785 %} 4786 4787 instruct Repl16F_evex(vecZ dst, regF src) %{ 4788 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4789 match(Set dst (ReplicateF src)); 4790 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4791 ins_encode %{ 4792 int vector_len = 2; 4793 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4794 %} 4795 ins_pipe( pipe_slow ); 4796 %} 4797 4798 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4799 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4800 match(Set dst (ReplicateF (LoadF mem))); 4801 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4802 ins_encode %{ 4803 int vector_len = 2; 4804 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4805 %} 4806 ins_pipe( pipe_slow ); 4807 %} 4808 4809 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4810 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4811 match(Set dst (ReplicateF zero)); 4812 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4813 ins_encode %{ 4814 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4815 int vector_len = 2; 4816 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4817 %} 4818 ins_pipe( fpu_reg_reg ); 4819 %} 4820 4821 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4822 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4823 match(Set dst (ReplicateF zero)); 4824 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4825 ins_encode %{ 4826 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4827 int vector_len = 2; 4828 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4829 %} 4830 ins_pipe( fpu_reg_reg ); 4831 %} 4832 4833 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4834 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4835 match(Set dst (ReplicateF zero)); 4836 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4837 ins_encode %{ 4838 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4839 int vector_len = 2; 4840 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4841 %} 4842 ins_pipe( fpu_reg_reg ); 4843 %} 4844 4845 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4846 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4847 match(Set dst (ReplicateF zero)); 4848 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4849 ins_encode %{ 4850 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4851 int vector_len = 2; 4852 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4853 %} 4854 ins_pipe( fpu_reg_reg ); 4855 %} 4856 4857 instruct Repl4D_evex(vecY dst, regD src) %{ 4858 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4859 match(Set dst (ReplicateD src)); 4860 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4861 ins_encode %{ 4862 int vector_len = 1; 4863 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4864 %} 4865 ins_pipe( pipe_slow ); 4866 %} 4867 4868 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4869 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4870 match(Set dst (ReplicateD (LoadD mem))); 4871 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4872 ins_encode %{ 4873 int vector_len = 1; 4874 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4875 %} 4876 ins_pipe( pipe_slow ); 4877 %} 4878 4879 instruct Repl8D_evex(vecZ dst, regD src) %{ 4880 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4881 match(Set dst (ReplicateD src)); 4882 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4883 ins_encode %{ 4884 int vector_len = 2; 4885 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4886 %} 4887 ins_pipe( pipe_slow ); 4888 %} 4889 4890 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4891 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4892 match(Set dst (ReplicateD (LoadD mem))); 4893 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4894 ins_encode %{ 4895 int vector_len = 2; 4896 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4897 %} 4898 ins_pipe( pipe_slow ); 4899 %} 4900 4901 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4902 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4903 match(Set dst (ReplicateD zero)); 4904 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4905 ins_encode %{ 4906 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4907 int vector_len = 2; 4908 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4909 %} 4910 ins_pipe( fpu_reg_reg ); 4911 %} 4912 4913 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4914 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4915 match(Set dst (ReplicateD zero)); 4916 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4917 ins_encode %{ 4918 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4919 int vector_len = 2; 4920 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4921 %} 4922 ins_pipe( fpu_reg_reg ); 4923 %} 4924 4925 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4926 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4927 match(Set dst (ReplicateD zero)); 4928 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4929 ins_encode %{ 4930 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4931 int vector_len = 2; 4932 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4933 %} 4934 ins_pipe( fpu_reg_reg ); 4935 %} 4936 4937 // ====================REDUCTION ARITHMETIC======================================= 4938 4939 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4940 predicate(UseSSE > 2 && UseAVX == 0); 4941 match(Set dst (AddReductionVI src1 src2)); 4942 effect(TEMP tmp2, TEMP tmp); 4943 format %{ "movdqu $tmp2,$src2\n\t" 4944 "phaddd $tmp2,$tmp2\n\t" 4945 "movd $tmp,$src1\n\t" 4946 "paddd $tmp,$tmp2\n\t" 4947 "movd $dst,$tmp\t! add reduction2I" %} 4948 ins_encode %{ 4949 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4950 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4951 __ movdl($tmp$$XMMRegister, $src1$$Register); 4952 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4953 __ movdl($dst$$Register, $tmp$$XMMRegister); 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4959 predicate(VM_Version::supports_avxonly()); 4960 match(Set dst (AddReductionVI src1 src2)); 4961 effect(TEMP tmp, TEMP tmp2); 4962 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4963 "movd $tmp2,$src1\n\t" 4964 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4965 "movd $dst,$tmp2\t! add reduction2I" %} 4966 ins_encode %{ 4967 int vector_len = 0; 4968 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4969 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4970 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4971 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4972 %} 4973 ins_pipe( pipe_slow ); 4974 %} 4975 4976 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4977 predicate(UseAVX > 2); 4978 match(Set dst (AddReductionVI src1 src2)); 4979 effect(TEMP tmp, TEMP tmp2); 4980 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4981 "vpaddd $tmp,$src2,$tmp2\n\t" 4982 "movd $tmp2,$src1\n\t" 4983 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4984 "movd $dst,$tmp2\t! add reduction2I" %} 4985 ins_encode %{ 4986 int vector_len = 0; 4987 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4988 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4989 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4990 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4991 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4997 predicate(UseSSE > 2 && UseAVX == 0); 4998 match(Set dst (AddReductionVI src1 src2)); 4999 effect(TEMP tmp, TEMP tmp2); 5000 format %{ "movdqu $tmp,$src2\n\t" 5001 "phaddd $tmp,$tmp\n\t" 5002 "phaddd $tmp,$tmp\n\t" 5003 "movd $tmp2,$src1\n\t" 5004 "paddd $tmp2,$tmp\n\t" 5005 "movd $dst,$tmp2\t! add reduction4I" %} 5006 ins_encode %{ 5007 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5008 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5009 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5010 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5011 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5012 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5013 %} 5014 ins_pipe( pipe_slow ); 5015 %} 5016 5017 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5018 predicate(VM_Version::supports_avxonly()); 5019 match(Set dst (AddReductionVI src1 src2)); 5020 effect(TEMP tmp, TEMP tmp2); 5021 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5022 "vphaddd $tmp,$tmp,$tmp\n\t" 5023 "movd $tmp2,$src1\n\t" 5024 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5025 "movd $dst,$tmp2\t! add reduction4I" %} 5026 ins_encode %{ 5027 int vector_len = 0; 5028 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5029 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5030 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5031 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5032 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5033 %} 5034 ins_pipe( pipe_slow ); 5035 %} 5036 5037 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5038 predicate(UseAVX > 2); 5039 match(Set dst (AddReductionVI src1 src2)); 5040 effect(TEMP tmp, TEMP tmp2); 5041 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5042 "vpaddd $tmp,$src2,$tmp2\n\t" 5043 "pshufd $tmp2,$tmp,0x1\n\t" 5044 "vpaddd $tmp,$tmp,$tmp2\n\t" 5045 "movd $tmp2,$src1\n\t" 5046 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5047 "movd $dst,$tmp2\t! add reduction4I" %} 5048 ins_encode %{ 5049 int vector_len = 0; 5050 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5051 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5052 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5053 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5054 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5055 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5056 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5057 %} 5058 ins_pipe( pipe_slow ); 5059 %} 5060 5061 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5062 predicate(VM_Version::supports_avxonly()); 5063 match(Set dst (AddReductionVI src1 src2)); 5064 effect(TEMP tmp, TEMP tmp2); 5065 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5066 "vphaddd $tmp,$tmp,$tmp2\n\t" 5067 "vextracti128_high $tmp2,$tmp\n\t" 5068 "vpaddd $tmp,$tmp,$tmp2\n\t" 5069 "movd $tmp2,$src1\n\t" 5070 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5071 "movd $dst,$tmp2\t! add reduction8I" %} 5072 ins_encode %{ 5073 int vector_len = 1; 5074 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5075 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5076 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5077 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5078 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5079 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5080 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5081 %} 5082 ins_pipe( pipe_slow ); 5083 %} 5084 5085 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5086 predicate(UseAVX > 2); 5087 match(Set dst (AddReductionVI src1 src2)); 5088 effect(TEMP tmp, TEMP tmp2); 5089 format %{ "vextracti128_high $tmp,$src2\n\t" 5090 "vpaddd $tmp,$tmp,$src2\n\t" 5091 "pshufd $tmp2,$tmp,0xE\n\t" 5092 "vpaddd $tmp,$tmp,$tmp2\n\t" 5093 "pshufd $tmp2,$tmp,0x1\n\t" 5094 "vpaddd $tmp,$tmp,$tmp2\n\t" 5095 "movd $tmp2,$src1\n\t" 5096 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5097 "movd $dst,$tmp2\t! add reduction8I" %} 5098 ins_encode %{ 5099 int vector_len = 0; 5100 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5101 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5102 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5103 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5104 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5105 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5106 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5107 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5108 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5109 %} 5110 ins_pipe( pipe_slow ); 5111 %} 5112 5113 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5114 predicate(UseAVX > 2); 5115 match(Set dst (AddReductionVI src1 src2)); 5116 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5117 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5118 "vpaddd $tmp3,$tmp3,$src2\n\t" 5119 "vextracti128_high $tmp,$tmp3\n\t" 5120 "vpaddd $tmp,$tmp,$tmp3\n\t" 5121 "pshufd $tmp2,$tmp,0xE\n\t" 5122 "vpaddd $tmp,$tmp,$tmp2\n\t" 5123 "pshufd $tmp2,$tmp,0x1\n\t" 5124 "vpaddd $tmp,$tmp,$tmp2\n\t" 5125 "movd $tmp2,$src1\n\t" 5126 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5127 "movd $dst,$tmp2\t! mul reduction16I" %} 5128 ins_encode %{ 5129 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5130 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5131 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5132 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5133 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5134 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5135 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5136 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5137 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5138 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5139 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5140 %} 5141 ins_pipe( pipe_slow ); 5142 %} 5143 5144 #ifdef _LP64 5145 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5146 predicate(UseAVX > 2); 5147 match(Set dst (AddReductionVL src1 src2)); 5148 effect(TEMP tmp, TEMP tmp2); 5149 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5150 "vpaddq $tmp,$src2,$tmp2\n\t" 5151 "movdq $tmp2,$src1\n\t" 5152 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5153 "movdq $dst,$tmp2\t! add reduction2L" %} 5154 ins_encode %{ 5155 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5156 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5157 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5158 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5159 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5160 %} 5161 ins_pipe( pipe_slow ); 5162 %} 5163 5164 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5165 predicate(UseAVX > 2); 5166 match(Set dst (AddReductionVL src1 src2)); 5167 effect(TEMP tmp, TEMP tmp2); 5168 format %{ "vextracti128_high $tmp,$src2\n\t" 5169 "vpaddq $tmp2,$tmp,$src2\n\t" 5170 "pshufd $tmp,$tmp2,0xE\n\t" 5171 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5172 "movdq $tmp,$src1\n\t" 5173 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5174 "movdq $dst,$tmp2\t! add reduction4L" %} 5175 ins_encode %{ 5176 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5177 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5178 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5179 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5180 __ movdq($tmp$$XMMRegister, $src1$$Register); 5181 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5182 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5183 %} 5184 ins_pipe( pipe_slow ); 5185 %} 5186 5187 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5188 predicate(UseAVX > 2); 5189 match(Set dst (AddReductionVL src1 src2)); 5190 effect(TEMP tmp, TEMP tmp2); 5191 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5192 "vpaddq $tmp2,$tmp2,$src2\n\t" 5193 "vextracti128_high $tmp,$tmp2\n\t" 5194 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5195 "pshufd $tmp,$tmp2,0xE\n\t" 5196 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5197 "movdq $tmp,$src1\n\t" 5198 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5199 "movdq $dst,$tmp2\t! add reduction8L" %} 5200 ins_encode %{ 5201 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5202 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5203 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5204 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5205 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5206 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5207 __ movdq($tmp$$XMMRegister, $src1$$Register); 5208 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5209 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5210 %} 5211 ins_pipe( pipe_slow ); 5212 %} 5213 #endif 5214 5215 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5216 predicate(UseSSE >= 1 && UseAVX == 0); 5217 match(Set dst (AddReductionVF dst src2)); 5218 effect(TEMP dst, TEMP tmp); 5219 format %{ "addss $dst,$src2\n\t" 5220 "pshufd $tmp,$src2,0x01\n\t" 5221 "addss $dst,$tmp\t! add reduction2F" %} 5222 ins_encode %{ 5223 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5224 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5225 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5226 %} 5227 ins_pipe( pipe_slow ); 5228 %} 5229 5230 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5231 predicate(UseAVX > 0); 5232 match(Set dst (AddReductionVF dst src2)); 5233 effect(TEMP dst, TEMP tmp); 5234 format %{ "vaddss $dst,$dst,$src2\n\t" 5235 "pshufd $tmp,$src2,0x01\n\t" 5236 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5237 ins_encode %{ 5238 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5239 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5240 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5241 %} 5242 ins_pipe( pipe_slow ); 5243 %} 5244 5245 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5246 predicate(UseSSE >= 1 && UseAVX == 0); 5247 match(Set dst (AddReductionVF dst src2)); 5248 effect(TEMP dst, TEMP tmp); 5249 format %{ "addss $dst,$src2\n\t" 5250 "pshufd $tmp,$src2,0x01\n\t" 5251 "addss $dst,$tmp\n\t" 5252 "pshufd $tmp,$src2,0x02\n\t" 5253 "addss $dst,$tmp\n\t" 5254 "pshufd $tmp,$src2,0x03\n\t" 5255 "addss $dst,$tmp\t! add reduction4F" %} 5256 ins_encode %{ 5257 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5258 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5259 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5260 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5261 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5262 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5263 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5264 %} 5265 ins_pipe( pipe_slow ); 5266 %} 5267 5268 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5269 predicate(UseAVX > 0); 5270 match(Set dst (AddReductionVF dst src2)); 5271 effect(TEMP tmp, TEMP dst); 5272 format %{ "vaddss $dst,dst,$src2\n\t" 5273 "pshufd $tmp,$src2,0x01\n\t" 5274 "vaddss $dst,$dst,$tmp\n\t" 5275 "pshufd $tmp,$src2,0x02\n\t" 5276 "vaddss $dst,$dst,$tmp\n\t" 5277 "pshufd $tmp,$src2,0x03\n\t" 5278 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5279 ins_encode %{ 5280 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5281 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5282 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5283 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5284 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5285 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5286 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5287 %} 5288 ins_pipe( pipe_slow ); 5289 %} 5290 5291 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5292 predicate(UseAVX > 0); 5293 match(Set dst (AddReductionVF dst src2)); 5294 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5295 format %{ "vaddss $dst,$dst,$src2\n\t" 5296 "pshufd $tmp,$src2,0x01\n\t" 5297 "vaddss $dst,$dst,$tmp\n\t" 5298 "pshufd $tmp,$src2,0x02\n\t" 5299 "vaddss $dst,$dst,$tmp\n\t" 5300 "pshufd $tmp,$src2,0x03\n\t" 5301 "vaddss $dst,$dst,$tmp\n\t" 5302 "vextractf128_high $tmp2,$src2\n\t" 5303 "vaddss $dst,$dst,$tmp2\n\t" 5304 "pshufd $tmp,$tmp2,0x01\n\t" 5305 "vaddss $dst,$dst,$tmp\n\t" 5306 "pshufd $tmp,$tmp2,0x02\n\t" 5307 "vaddss $dst,$dst,$tmp\n\t" 5308 "pshufd $tmp,$tmp2,0x03\n\t" 5309 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5310 ins_encode %{ 5311 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5312 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5313 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5314 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5315 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5316 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5317 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5318 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5319 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5320 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5321 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5322 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5323 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5324 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5325 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5326 %} 5327 ins_pipe( pipe_slow ); 5328 %} 5329 5330 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5331 predicate(UseAVX > 2); 5332 match(Set dst (AddReductionVF dst src2)); 5333 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5334 format %{ "vaddss $dst,$dst,$src2\n\t" 5335 "pshufd $tmp,$src2,0x01\n\t" 5336 "vaddss $dst,$dst,$tmp\n\t" 5337 "pshufd $tmp,$src2,0x02\n\t" 5338 "vaddss $dst,$dst,$tmp\n\t" 5339 "pshufd $tmp,$src2,0x03\n\t" 5340 "vaddss $dst,$dst,$tmp\n\t" 5341 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5342 "vaddss $dst,$dst,$tmp2\n\t" 5343 "pshufd $tmp,$tmp2,0x01\n\t" 5344 "vaddss $dst,$dst,$tmp\n\t" 5345 "pshufd $tmp,$tmp2,0x02\n\t" 5346 "vaddss $dst,$dst,$tmp\n\t" 5347 "pshufd $tmp,$tmp2,0x03\n\t" 5348 "vaddss $dst,$dst,$tmp\n\t" 5349 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5350 "vaddss $dst,$dst,$tmp2\n\t" 5351 "pshufd $tmp,$tmp2,0x01\n\t" 5352 "vaddss $dst,$dst,$tmp\n\t" 5353 "pshufd $tmp,$tmp2,0x02\n\t" 5354 "vaddss $dst,$dst,$tmp\n\t" 5355 "pshufd $tmp,$tmp2,0x03\n\t" 5356 "vaddss $dst,$dst,$tmp\n\t" 5357 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5358 "vaddss $dst,$dst,$tmp2\n\t" 5359 "pshufd $tmp,$tmp2,0x01\n\t" 5360 "vaddss $dst,$dst,$tmp\n\t" 5361 "pshufd $tmp,$tmp2,0x02\n\t" 5362 "vaddss $dst,$dst,$tmp\n\t" 5363 "pshufd $tmp,$tmp2,0x03\n\t" 5364 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5365 ins_encode %{ 5366 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5367 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5368 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5369 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5370 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5371 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5372 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5373 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5374 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5375 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5376 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5377 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5378 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5379 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5380 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5381 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5382 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5383 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5384 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5385 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5386 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5387 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5388 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5389 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5390 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5391 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5392 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5393 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5394 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5395 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5396 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5397 %} 5398 ins_pipe( pipe_slow ); 5399 %} 5400 5401 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5402 predicate(UseSSE >= 1 && UseAVX == 0); 5403 match(Set dst (AddReductionVD dst src2)); 5404 effect(TEMP tmp, TEMP dst); 5405 format %{ "addsd $dst,$src2\n\t" 5406 "pshufd $tmp,$src2,0xE\n\t" 5407 "addsd $dst,$tmp\t! add reduction2D" %} 5408 ins_encode %{ 5409 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5410 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5411 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5412 %} 5413 ins_pipe( pipe_slow ); 5414 %} 5415 5416 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5417 predicate(UseAVX > 0); 5418 match(Set dst (AddReductionVD dst src2)); 5419 effect(TEMP tmp, TEMP dst); 5420 format %{ "vaddsd $dst,$dst,$src2\n\t" 5421 "pshufd $tmp,$src2,0xE\n\t" 5422 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5423 ins_encode %{ 5424 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5425 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5426 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5427 %} 5428 ins_pipe( pipe_slow ); 5429 %} 5430 5431 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5432 predicate(UseAVX > 0); 5433 match(Set dst (AddReductionVD dst src2)); 5434 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5435 format %{ "vaddsd $dst,$dst,$src2\n\t" 5436 "pshufd $tmp,$src2,0xE\n\t" 5437 "vaddsd $dst,$dst,$tmp\n\t" 5438 "vextractf128 $tmp2,$src2,0x1\n\t" 5439 "vaddsd $dst,$dst,$tmp2\n\t" 5440 "pshufd $tmp,$tmp2,0xE\n\t" 5441 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5442 ins_encode %{ 5443 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5444 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5445 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5446 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5447 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5448 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5449 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5450 %} 5451 ins_pipe( pipe_slow ); 5452 %} 5453 5454 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5455 predicate(UseAVX > 2); 5456 match(Set dst (AddReductionVD dst src2)); 5457 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5458 format %{ "vaddsd $dst,$dst,$src2\n\t" 5459 "pshufd $tmp,$src2,0xE\n\t" 5460 "vaddsd $dst,$dst,$tmp\n\t" 5461 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5462 "vaddsd $dst,$dst,$tmp2\n\t" 5463 "pshufd $tmp,$tmp2,0xE\n\t" 5464 "vaddsd $dst,$dst,$tmp\n\t" 5465 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5466 "vaddsd $dst,$dst,$tmp2\n\t" 5467 "pshufd $tmp,$tmp2,0xE\n\t" 5468 "vaddsd $dst,$dst,$tmp\n\t" 5469 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5470 "vaddsd $dst,$dst,$tmp2\n\t" 5471 "pshufd $tmp,$tmp2,0xE\n\t" 5472 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5473 ins_encode %{ 5474 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5475 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5476 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5477 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5478 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5479 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5480 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5481 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5482 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5483 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5484 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5485 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5486 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5487 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5488 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5489 %} 5490 ins_pipe( pipe_slow ); 5491 %} 5492 5493 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5494 predicate(UseSSE > 3 && UseAVX == 0); 5495 match(Set dst (MulReductionVI src1 src2)); 5496 effect(TEMP tmp, TEMP tmp2); 5497 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5498 "pmulld $tmp2,$src2\n\t" 5499 "movd $tmp,$src1\n\t" 5500 "pmulld $tmp2,$tmp\n\t" 5501 "movd $dst,$tmp2\t! mul reduction2I" %} 5502 ins_encode %{ 5503 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5504 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5505 __ movdl($tmp$$XMMRegister, $src1$$Register); 5506 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5507 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5513 predicate(UseAVX > 0); 5514 match(Set dst (MulReductionVI src1 src2)); 5515 effect(TEMP tmp, TEMP tmp2); 5516 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5517 "vpmulld $tmp,$src2,$tmp2\n\t" 5518 "movd $tmp2,$src1\n\t" 5519 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5520 "movd $dst,$tmp2\t! mul reduction2I" %} 5521 ins_encode %{ 5522 int vector_len = 0; 5523 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5524 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5525 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5526 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5527 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5528 %} 5529 ins_pipe( pipe_slow ); 5530 %} 5531 5532 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5533 predicate(UseSSE > 3 && UseAVX == 0); 5534 match(Set dst (MulReductionVI src1 src2)); 5535 effect(TEMP tmp, TEMP tmp2); 5536 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5537 "pmulld $tmp2,$src2\n\t" 5538 "pshufd $tmp,$tmp2,0x1\n\t" 5539 "pmulld $tmp2,$tmp\n\t" 5540 "movd $tmp,$src1\n\t" 5541 "pmulld $tmp2,$tmp\n\t" 5542 "movd $dst,$tmp2\t! mul reduction4I" %} 5543 ins_encode %{ 5544 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5545 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5546 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5547 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5548 __ movdl($tmp$$XMMRegister, $src1$$Register); 5549 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5550 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5551 %} 5552 ins_pipe( pipe_slow ); 5553 %} 5554 5555 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5556 predicate(UseAVX > 0); 5557 match(Set dst (MulReductionVI src1 src2)); 5558 effect(TEMP tmp, TEMP tmp2); 5559 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5560 "vpmulld $tmp,$src2,$tmp2\n\t" 5561 "pshufd $tmp2,$tmp,0x1\n\t" 5562 "vpmulld $tmp,$tmp,$tmp2\n\t" 5563 "movd $tmp2,$src1\n\t" 5564 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5565 "movd $dst,$tmp2\t! mul reduction4I" %} 5566 ins_encode %{ 5567 int vector_len = 0; 5568 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5569 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5570 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5571 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5572 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5573 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5574 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5575 %} 5576 ins_pipe( pipe_slow ); 5577 %} 5578 5579 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5580 predicate(UseAVX > 1); 5581 match(Set dst (MulReductionVI src1 src2)); 5582 effect(TEMP tmp, TEMP tmp2); 5583 format %{ "vextracti128_high $tmp,$src2\n\t" 5584 "vpmulld $tmp,$tmp,$src2\n\t" 5585 "pshufd $tmp2,$tmp,0xE\n\t" 5586 "vpmulld $tmp,$tmp,$tmp2\n\t" 5587 "pshufd $tmp2,$tmp,0x1\n\t" 5588 "vpmulld $tmp,$tmp,$tmp2\n\t" 5589 "movd $tmp2,$src1\n\t" 5590 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5591 "movd $dst,$tmp2\t! mul reduction8I" %} 5592 ins_encode %{ 5593 int vector_len = 0; 5594 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5595 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5596 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5597 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5598 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5599 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5600 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5601 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5602 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5603 %} 5604 ins_pipe( pipe_slow ); 5605 %} 5606 5607 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5608 predicate(UseAVX > 2); 5609 match(Set dst (MulReductionVI src1 src2)); 5610 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5611 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5612 "vpmulld $tmp3,$tmp3,$src2\n\t" 5613 "vextracti128_high $tmp,$tmp3\n\t" 5614 "vpmulld $tmp,$tmp,$src2\n\t" 5615 "pshufd $tmp2,$tmp,0xE\n\t" 5616 "vpmulld $tmp,$tmp,$tmp2\n\t" 5617 "pshufd $tmp2,$tmp,0x1\n\t" 5618 "vpmulld $tmp,$tmp,$tmp2\n\t" 5619 "movd $tmp2,$src1\n\t" 5620 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5621 "movd $dst,$tmp2\t! mul reduction16I" %} 5622 ins_encode %{ 5623 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5624 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5625 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5626 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5627 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5628 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5629 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5630 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5631 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5632 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5633 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5634 %} 5635 ins_pipe( pipe_slow ); 5636 %} 5637 5638 #ifdef _LP64 5639 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5640 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5641 match(Set dst (MulReductionVL src1 src2)); 5642 effect(TEMP tmp, TEMP tmp2); 5643 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5644 "vpmullq $tmp,$src2,$tmp2\n\t" 5645 "movdq $tmp2,$src1\n\t" 5646 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5647 "movdq $dst,$tmp2\t! mul reduction2L" %} 5648 ins_encode %{ 5649 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5650 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5651 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5652 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5653 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5654 %} 5655 ins_pipe( pipe_slow ); 5656 %} 5657 5658 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5659 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5660 match(Set dst (MulReductionVL src1 src2)); 5661 effect(TEMP tmp, TEMP tmp2); 5662 format %{ "vextracti128_high $tmp,$src2\n\t" 5663 "vpmullq $tmp2,$tmp,$src2\n\t" 5664 "pshufd $tmp,$tmp2,0xE\n\t" 5665 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5666 "movdq $tmp,$src1\n\t" 5667 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5668 "movdq $dst,$tmp2\t! mul reduction4L" %} 5669 ins_encode %{ 5670 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5671 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5672 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5673 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5674 __ movdq($tmp$$XMMRegister, $src1$$Register); 5675 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5676 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5677 %} 5678 ins_pipe( pipe_slow ); 5679 %} 5680 5681 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5682 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5683 match(Set dst (MulReductionVL src1 src2)); 5684 effect(TEMP tmp, TEMP tmp2); 5685 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5686 "vpmullq $tmp2,$tmp2,$src2\n\t" 5687 "vextracti128_high $tmp,$tmp2\n\t" 5688 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5689 "pshufd $tmp,$tmp2,0xE\n\t" 5690 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5691 "movdq $tmp,$src1\n\t" 5692 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5693 "movdq $dst,$tmp2\t! mul reduction8L" %} 5694 ins_encode %{ 5695 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5696 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5697 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5698 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5699 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5700 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5701 __ movdq($tmp$$XMMRegister, $src1$$Register); 5702 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5703 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 #endif 5708 5709 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5710 predicate(UseSSE >= 1 && UseAVX == 0); 5711 match(Set dst (MulReductionVF dst src2)); 5712 effect(TEMP dst, TEMP tmp); 5713 format %{ "mulss $dst,$src2\n\t" 5714 "pshufd $tmp,$src2,0x01\n\t" 5715 "mulss $dst,$tmp\t! mul reduction2F" %} 5716 ins_encode %{ 5717 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5718 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5719 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5720 %} 5721 ins_pipe( pipe_slow ); 5722 %} 5723 5724 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5725 predicate(UseAVX > 0); 5726 match(Set dst (MulReductionVF dst src2)); 5727 effect(TEMP tmp, TEMP dst); 5728 format %{ "vmulss $dst,$dst,$src2\n\t" 5729 "pshufd $tmp,$src2,0x01\n\t" 5730 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5731 ins_encode %{ 5732 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5733 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5734 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5740 predicate(UseSSE >= 1 && UseAVX == 0); 5741 match(Set dst (MulReductionVF dst src2)); 5742 effect(TEMP dst, TEMP tmp); 5743 format %{ "mulss $dst,$src2\n\t" 5744 "pshufd $tmp,$src2,0x01\n\t" 5745 "mulss $dst,$tmp\n\t" 5746 "pshufd $tmp,$src2,0x02\n\t" 5747 "mulss $dst,$tmp\n\t" 5748 "pshufd $tmp,$src2,0x03\n\t" 5749 "mulss $dst,$tmp\t! mul reduction4F" %} 5750 ins_encode %{ 5751 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5752 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5753 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5754 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5755 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5756 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5757 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5758 %} 5759 ins_pipe( pipe_slow ); 5760 %} 5761 5762 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5763 predicate(UseAVX > 0); 5764 match(Set dst (MulReductionVF dst src2)); 5765 effect(TEMP tmp, TEMP dst); 5766 format %{ "vmulss $dst,$dst,$src2\n\t" 5767 "pshufd $tmp,$src2,0x01\n\t" 5768 "vmulss $dst,$dst,$tmp\n\t" 5769 "pshufd $tmp,$src2,0x02\n\t" 5770 "vmulss $dst,$dst,$tmp\n\t" 5771 "pshufd $tmp,$src2,0x03\n\t" 5772 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5773 ins_encode %{ 5774 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5775 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5776 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5777 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5778 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5779 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5780 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5781 %} 5782 ins_pipe( pipe_slow ); 5783 %} 5784 5785 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5786 predicate(UseAVX > 0); 5787 match(Set dst (MulReductionVF dst src2)); 5788 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5789 format %{ "vmulss $dst,$dst,$src2\n\t" 5790 "pshufd $tmp,$src2,0x01\n\t" 5791 "vmulss $dst,$dst,$tmp\n\t" 5792 "pshufd $tmp,$src2,0x02\n\t" 5793 "vmulss $dst,$dst,$tmp\n\t" 5794 "pshufd $tmp,$src2,0x03\n\t" 5795 "vmulss $dst,$dst,$tmp\n\t" 5796 "vextractf128_high $tmp2,$src2\n\t" 5797 "vmulss $dst,$dst,$tmp2\n\t" 5798 "pshufd $tmp,$tmp2,0x01\n\t" 5799 "vmulss $dst,$dst,$tmp\n\t" 5800 "pshufd $tmp,$tmp2,0x02\n\t" 5801 "vmulss $dst,$dst,$tmp\n\t" 5802 "pshufd $tmp,$tmp2,0x03\n\t" 5803 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5804 ins_encode %{ 5805 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5806 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5807 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5808 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5809 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5810 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5811 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5812 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5813 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5814 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5815 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5816 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5817 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5818 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5819 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5825 predicate(UseAVX > 2); 5826 match(Set dst (MulReductionVF dst src2)); 5827 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5828 format %{ "vmulss $dst,$dst,$src2\n\t" 5829 "pshufd $tmp,$src2,0x01\n\t" 5830 "vmulss $dst,$dst,$tmp\n\t" 5831 "pshufd $tmp,$src2,0x02\n\t" 5832 "vmulss $dst,$dst,$tmp\n\t" 5833 "pshufd $tmp,$src2,0x03\n\t" 5834 "vmulss $dst,$dst,$tmp\n\t" 5835 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5836 "vmulss $dst,$dst,$tmp2\n\t" 5837 "pshufd $tmp,$tmp2,0x01\n\t" 5838 "vmulss $dst,$dst,$tmp\n\t" 5839 "pshufd $tmp,$tmp2,0x02\n\t" 5840 "vmulss $dst,$dst,$tmp\n\t" 5841 "pshufd $tmp,$tmp2,0x03\n\t" 5842 "vmulss $dst,$dst,$tmp\n\t" 5843 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5844 "vmulss $dst,$dst,$tmp2\n\t" 5845 "pshufd $tmp,$tmp2,0x01\n\t" 5846 "vmulss $dst,$dst,$tmp\n\t" 5847 "pshufd $tmp,$tmp2,0x02\n\t" 5848 "vmulss $dst,$dst,$tmp\n\t" 5849 "pshufd $tmp,$tmp2,0x03\n\t" 5850 "vmulss $dst,$dst,$tmp\n\t" 5851 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5852 "vmulss $dst,$dst,$tmp2\n\t" 5853 "pshufd $tmp,$tmp2,0x01\n\t" 5854 "vmulss $dst,$dst,$tmp\n\t" 5855 "pshufd $tmp,$tmp2,0x02\n\t" 5856 "vmulss $dst,$dst,$tmp\n\t" 5857 "pshufd $tmp,$tmp2,0x03\n\t" 5858 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5859 ins_encode %{ 5860 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5861 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5862 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5863 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5864 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5865 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5866 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5867 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5868 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5869 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5870 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5871 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5872 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5873 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5874 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5875 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5876 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5877 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5878 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5879 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5880 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5881 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5882 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5883 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5884 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5885 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5886 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5887 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5888 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5889 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5890 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5891 %} 5892 ins_pipe( pipe_slow ); 5893 %} 5894 5895 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5896 predicate(UseSSE >= 1 && UseAVX == 0); 5897 match(Set dst (MulReductionVD dst src2)); 5898 effect(TEMP dst, TEMP tmp); 5899 format %{ "mulsd $dst,$src2\n\t" 5900 "pshufd $tmp,$src2,0xE\n\t" 5901 "mulsd $dst,$tmp\t! mul reduction2D" %} 5902 ins_encode %{ 5903 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5904 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5905 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5906 %} 5907 ins_pipe( pipe_slow ); 5908 %} 5909 5910 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5911 predicate(UseAVX > 0); 5912 match(Set dst (MulReductionVD dst src2)); 5913 effect(TEMP tmp, TEMP dst); 5914 format %{ "vmulsd $dst,$dst,$src2\n\t" 5915 "pshufd $tmp,$src2,0xE\n\t" 5916 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5917 ins_encode %{ 5918 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5919 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5920 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5926 predicate(UseAVX > 0); 5927 match(Set dst (MulReductionVD dst src2)); 5928 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5929 format %{ "vmulsd $dst,$dst,$src2\n\t" 5930 "pshufd $tmp,$src2,0xE\n\t" 5931 "vmulsd $dst,$dst,$tmp\n\t" 5932 "vextractf128_high $tmp2,$src2\n\t" 5933 "vmulsd $dst,$dst,$tmp2\n\t" 5934 "pshufd $tmp,$tmp2,0xE\n\t" 5935 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5936 ins_encode %{ 5937 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5938 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5939 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5940 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5941 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5942 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5943 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5949 predicate(UseAVX > 2); 5950 match(Set dst (MulReductionVD dst src2)); 5951 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5952 format %{ "vmulsd $dst,$dst,$src2\n\t" 5953 "pshufd $tmp,$src2,0xE\n\t" 5954 "vmulsd $dst,$dst,$tmp\n\t" 5955 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5956 "vmulsd $dst,$dst,$tmp2\n\t" 5957 "pshufd $tmp,$src2,0xE\n\t" 5958 "vmulsd $dst,$dst,$tmp\n\t" 5959 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5960 "vmulsd $dst,$dst,$tmp2\n\t" 5961 "pshufd $tmp,$tmp2,0xE\n\t" 5962 "vmulsd $dst,$dst,$tmp\n\t" 5963 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5964 "vmulsd $dst,$dst,$tmp2\n\t" 5965 "pshufd $tmp,$tmp2,0xE\n\t" 5966 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5967 ins_encode %{ 5968 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5969 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5970 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5971 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5972 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5973 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5974 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5975 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5976 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5977 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5978 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5979 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5980 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5981 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5982 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5983 %} 5984 ins_pipe( pipe_slow ); 5985 %} 5986 5987 // ====================VECTOR ARITHMETIC======================================= 5988 5989 // --------------------------------- ADD -------------------------------------- 5990 5991 // Bytes vector add 5992 instruct vadd4B(vecS dst, vecS src) %{ 5993 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5994 match(Set dst (AddVB dst src)); 5995 format %{ "paddb $dst,$src\t! add packed4B" %} 5996 ins_encode %{ 5997 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5998 %} 5999 ins_pipe( pipe_slow ); 6000 %} 6001 6002 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 6003 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6004 match(Set dst (AddVB src1 src2)); 6005 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 6006 ins_encode %{ 6007 int vector_len = 0; 6008 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 6014 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6015 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6016 match(Set dst (AddVB src (LoadVector mem))); 6017 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6018 ins_encode %{ 6019 int vector_len = 0; 6020 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 instruct vadd8B(vecD dst, vecD src) %{ 6026 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6027 match(Set dst (AddVB dst src)); 6028 format %{ "paddb $dst,$src\t! add packed8B" %} 6029 ins_encode %{ 6030 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6036 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6037 match(Set dst (AddVB src1 src2)); 6038 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6039 ins_encode %{ 6040 int vector_len = 0; 6041 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 6047 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6048 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6049 match(Set dst (AddVB src (LoadVector mem))); 6050 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6051 ins_encode %{ 6052 int vector_len = 0; 6053 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6054 %} 6055 ins_pipe( pipe_slow ); 6056 %} 6057 6058 instruct vadd16B(vecX dst, vecX src) %{ 6059 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6060 match(Set dst (AddVB dst src)); 6061 format %{ "paddb $dst,$src\t! add packed16B" %} 6062 ins_encode %{ 6063 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6069 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6070 match(Set dst (AddVB src1 src2)); 6071 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6072 ins_encode %{ 6073 int vector_len = 0; 6074 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6075 %} 6076 ins_pipe( pipe_slow ); 6077 %} 6078 6079 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6080 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6081 match(Set dst (AddVB src (LoadVector mem))); 6082 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6083 ins_encode %{ 6084 int vector_len = 0; 6085 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6086 %} 6087 ins_pipe( pipe_slow ); 6088 %} 6089 6090 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6091 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6092 match(Set dst (AddVB src1 src2)); 6093 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6094 ins_encode %{ 6095 int vector_len = 1; 6096 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6097 %} 6098 ins_pipe( pipe_slow ); 6099 %} 6100 6101 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6102 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6103 match(Set dst (AddVB src (LoadVector mem))); 6104 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6105 ins_encode %{ 6106 int vector_len = 1; 6107 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6108 %} 6109 ins_pipe( pipe_slow ); 6110 %} 6111 6112 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6113 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6114 match(Set dst (AddVB src1 src2)); 6115 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6116 ins_encode %{ 6117 int vector_len = 2; 6118 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6119 %} 6120 ins_pipe( pipe_slow ); 6121 %} 6122 6123 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6124 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6125 match(Set dst (AddVB src (LoadVector mem))); 6126 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6127 ins_encode %{ 6128 int vector_len = 2; 6129 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6130 %} 6131 ins_pipe( pipe_slow ); 6132 %} 6133 6134 // Shorts/Chars vector add 6135 instruct vadd2S(vecS dst, vecS src) %{ 6136 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6137 match(Set dst (AddVS dst src)); 6138 format %{ "paddw $dst,$src\t! add packed2S" %} 6139 ins_encode %{ 6140 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6141 %} 6142 ins_pipe( pipe_slow ); 6143 %} 6144 6145 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6146 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6147 match(Set dst (AddVS src1 src2)); 6148 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6149 ins_encode %{ 6150 int vector_len = 0; 6151 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6152 %} 6153 ins_pipe( pipe_slow ); 6154 %} 6155 6156 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6157 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6158 match(Set dst (AddVS src (LoadVector mem))); 6159 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6160 ins_encode %{ 6161 int vector_len = 0; 6162 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6163 %} 6164 ins_pipe( pipe_slow ); 6165 %} 6166 6167 instruct vadd4S(vecD dst, vecD src) %{ 6168 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6169 match(Set dst (AddVS dst src)); 6170 format %{ "paddw $dst,$src\t! add packed4S" %} 6171 ins_encode %{ 6172 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6173 %} 6174 ins_pipe( pipe_slow ); 6175 %} 6176 6177 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6178 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6179 match(Set dst (AddVS src1 src2)); 6180 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6181 ins_encode %{ 6182 int vector_len = 0; 6183 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6184 %} 6185 ins_pipe( pipe_slow ); 6186 %} 6187 6188 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6189 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6190 match(Set dst (AddVS src (LoadVector mem))); 6191 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6192 ins_encode %{ 6193 int vector_len = 0; 6194 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6195 %} 6196 ins_pipe( pipe_slow ); 6197 %} 6198 6199 instruct vadd8S(vecX dst, vecX src) %{ 6200 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6201 match(Set dst (AddVS dst src)); 6202 format %{ "paddw $dst,$src\t! add packed8S" %} 6203 ins_encode %{ 6204 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6210 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6211 match(Set dst (AddVS src1 src2)); 6212 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6213 ins_encode %{ 6214 int vector_len = 0; 6215 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6216 %} 6217 ins_pipe( pipe_slow ); 6218 %} 6219 6220 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6221 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6222 match(Set dst (AddVS src (LoadVector mem))); 6223 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6224 ins_encode %{ 6225 int vector_len = 0; 6226 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6232 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6233 match(Set dst (AddVS src1 src2)); 6234 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6235 ins_encode %{ 6236 int vector_len = 1; 6237 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6243 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6244 match(Set dst (AddVS src (LoadVector mem))); 6245 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6246 ins_encode %{ 6247 int vector_len = 1; 6248 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6249 %} 6250 ins_pipe( pipe_slow ); 6251 %} 6252 6253 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6254 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6255 match(Set dst (AddVS src1 src2)); 6256 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6257 ins_encode %{ 6258 int vector_len = 2; 6259 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6265 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6266 match(Set dst (AddVS src (LoadVector mem))); 6267 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6268 ins_encode %{ 6269 int vector_len = 2; 6270 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6271 %} 6272 ins_pipe( pipe_slow ); 6273 %} 6274 6275 // Integers vector add 6276 instruct vadd2I(vecD dst, vecD src) %{ 6277 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6278 match(Set dst (AddVI dst src)); 6279 format %{ "paddd $dst,$src\t! add packed2I" %} 6280 ins_encode %{ 6281 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6282 %} 6283 ins_pipe( pipe_slow ); 6284 %} 6285 6286 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6287 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6288 match(Set dst (AddVI src1 src2)); 6289 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6290 ins_encode %{ 6291 int vector_len = 0; 6292 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6293 %} 6294 ins_pipe( pipe_slow ); 6295 %} 6296 6297 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6298 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6299 match(Set dst (AddVI src (LoadVector mem))); 6300 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6301 ins_encode %{ 6302 int vector_len = 0; 6303 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6304 %} 6305 ins_pipe( pipe_slow ); 6306 %} 6307 6308 instruct vadd4I(vecX dst, vecX src) %{ 6309 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6310 match(Set dst (AddVI dst src)); 6311 format %{ "paddd $dst,$src\t! add packed4I" %} 6312 ins_encode %{ 6313 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6314 %} 6315 ins_pipe( pipe_slow ); 6316 %} 6317 6318 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6319 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6320 match(Set dst (AddVI src1 src2)); 6321 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6322 ins_encode %{ 6323 int vector_len = 0; 6324 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6325 %} 6326 ins_pipe( pipe_slow ); 6327 %} 6328 6329 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6330 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6331 match(Set dst (AddVI src (LoadVector mem))); 6332 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6333 ins_encode %{ 6334 int vector_len = 0; 6335 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6341 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6342 match(Set dst (AddVI src1 src2)); 6343 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6344 ins_encode %{ 6345 int vector_len = 1; 6346 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6352 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6353 match(Set dst (AddVI src (LoadVector mem))); 6354 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6355 ins_encode %{ 6356 int vector_len = 1; 6357 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6363 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6364 match(Set dst (AddVI src1 src2)); 6365 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6366 ins_encode %{ 6367 int vector_len = 2; 6368 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6374 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6375 match(Set dst (AddVI src (LoadVector mem))); 6376 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6377 ins_encode %{ 6378 int vector_len = 2; 6379 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 // Longs vector add 6385 instruct vadd2L(vecX dst, vecX src) %{ 6386 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6387 match(Set dst (AddVL dst src)); 6388 format %{ "paddq $dst,$src\t! add packed2L" %} 6389 ins_encode %{ 6390 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6396 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6397 match(Set dst (AddVL src1 src2)); 6398 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6399 ins_encode %{ 6400 int vector_len = 0; 6401 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6407 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6408 match(Set dst (AddVL src (LoadVector mem))); 6409 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6410 ins_encode %{ 6411 int vector_len = 0; 6412 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6413 %} 6414 ins_pipe( pipe_slow ); 6415 %} 6416 6417 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6418 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6419 match(Set dst (AddVL src1 src2)); 6420 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6421 ins_encode %{ 6422 int vector_len = 1; 6423 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6424 %} 6425 ins_pipe( pipe_slow ); 6426 %} 6427 6428 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6429 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6430 match(Set dst (AddVL src (LoadVector mem))); 6431 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6432 ins_encode %{ 6433 int vector_len = 1; 6434 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6435 %} 6436 ins_pipe( pipe_slow ); 6437 %} 6438 6439 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6440 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6441 match(Set dst (AddVL src1 src2)); 6442 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6443 ins_encode %{ 6444 int vector_len = 2; 6445 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6446 %} 6447 ins_pipe( pipe_slow ); 6448 %} 6449 6450 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6451 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6452 match(Set dst (AddVL src (LoadVector mem))); 6453 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6454 ins_encode %{ 6455 int vector_len = 2; 6456 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6457 %} 6458 ins_pipe( pipe_slow ); 6459 %} 6460 6461 // Floats vector add 6462 instruct vadd2F(vecD dst, vecD src) %{ 6463 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6464 match(Set dst (AddVF dst src)); 6465 format %{ "addps $dst,$src\t! add packed2F" %} 6466 ins_encode %{ 6467 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6468 %} 6469 ins_pipe( pipe_slow ); 6470 %} 6471 6472 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6473 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6474 match(Set dst (AddVF src1 src2)); 6475 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6476 ins_encode %{ 6477 int vector_len = 0; 6478 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6479 %} 6480 ins_pipe( pipe_slow ); 6481 %} 6482 6483 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6484 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6485 match(Set dst (AddVF src (LoadVector mem))); 6486 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6487 ins_encode %{ 6488 int vector_len = 0; 6489 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6490 %} 6491 ins_pipe( pipe_slow ); 6492 %} 6493 6494 instruct vadd4F(vecX dst, vecX src) %{ 6495 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6496 match(Set dst (AddVF dst src)); 6497 format %{ "addps $dst,$src\t! add packed4F" %} 6498 ins_encode %{ 6499 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6500 %} 6501 ins_pipe( pipe_slow ); 6502 %} 6503 6504 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6505 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6506 match(Set dst (AddVF src1 src2)); 6507 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6508 ins_encode %{ 6509 int vector_len = 0; 6510 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6511 %} 6512 ins_pipe( pipe_slow ); 6513 %} 6514 6515 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6516 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6517 match(Set dst (AddVF src (LoadVector mem))); 6518 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6519 ins_encode %{ 6520 int vector_len = 0; 6521 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6522 %} 6523 ins_pipe( pipe_slow ); 6524 %} 6525 6526 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6527 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6528 match(Set dst (AddVF src1 src2)); 6529 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6530 ins_encode %{ 6531 int vector_len = 1; 6532 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6533 %} 6534 ins_pipe( pipe_slow ); 6535 %} 6536 6537 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6538 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6539 match(Set dst (AddVF src (LoadVector mem))); 6540 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6541 ins_encode %{ 6542 int vector_len = 1; 6543 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6549 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6550 match(Set dst (AddVF src1 src2)); 6551 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6552 ins_encode %{ 6553 int vector_len = 2; 6554 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6555 %} 6556 ins_pipe( pipe_slow ); 6557 %} 6558 6559 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6560 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6561 match(Set dst (AddVF src (LoadVector mem))); 6562 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6563 ins_encode %{ 6564 int vector_len = 2; 6565 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6566 %} 6567 ins_pipe( pipe_slow ); 6568 %} 6569 6570 // Doubles vector add 6571 instruct vadd2D(vecX dst, vecX src) %{ 6572 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6573 match(Set dst (AddVD dst src)); 6574 format %{ "addpd $dst,$src\t! add packed2D" %} 6575 ins_encode %{ 6576 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6577 %} 6578 ins_pipe( pipe_slow ); 6579 %} 6580 6581 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6582 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6583 match(Set dst (AddVD src1 src2)); 6584 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6585 ins_encode %{ 6586 int vector_len = 0; 6587 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6588 %} 6589 ins_pipe( pipe_slow ); 6590 %} 6591 6592 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6593 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6594 match(Set dst (AddVD src (LoadVector mem))); 6595 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6596 ins_encode %{ 6597 int vector_len = 0; 6598 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6599 %} 6600 ins_pipe( pipe_slow ); 6601 %} 6602 6603 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6604 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6605 match(Set dst (AddVD src1 src2)); 6606 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6607 ins_encode %{ 6608 int vector_len = 1; 6609 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6615 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6616 match(Set dst (AddVD src (LoadVector mem))); 6617 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6618 ins_encode %{ 6619 int vector_len = 1; 6620 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6626 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6627 match(Set dst (AddVD src1 src2)); 6628 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6629 ins_encode %{ 6630 int vector_len = 2; 6631 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6637 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6638 match(Set dst (AddVD src (LoadVector mem))); 6639 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6640 ins_encode %{ 6641 int vector_len = 2; 6642 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6643 %} 6644 ins_pipe( pipe_slow ); 6645 %} 6646 6647 // --------------------------------- SUB -------------------------------------- 6648 6649 // Bytes vector sub 6650 instruct vsub4B(vecS dst, vecS src) %{ 6651 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6652 match(Set dst (SubVB dst src)); 6653 format %{ "psubb $dst,$src\t! sub packed4B" %} 6654 ins_encode %{ 6655 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6656 %} 6657 ins_pipe( pipe_slow ); 6658 %} 6659 6660 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6662 match(Set dst (SubVB src1 src2)); 6663 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6664 ins_encode %{ 6665 int vector_len = 0; 6666 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6667 %} 6668 ins_pipe( pipe_slow ); 6669 %} 6670 6671 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6672 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6673 match(Set dst (SubVB src (LoadVector mem))); 6674 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6675 ins_encode %{ 6676 int vector_len = 0; 6677 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6678 %} 6679 ins_pipe( pipe_slow ); 6680 %} 6681 6682 instruct vsub8B(vecD dst, vecD src) %{ 6683 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6684 match(Set dst (SubVB dst src)); 6685 format %{ "psubb $dst,$src\t! sub packed8B" %} 6686 ins_encode %{ 6687 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6688 %} 6689 ins_pipe( pipe_slow ); 6690 %} 6691 6692 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6693 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6694 match(Set dst (SubVB src1 src2)); 6695 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6696 ins_encode %{ 6697 int vector_len = 0; 6698 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6699 %} 6700 ins_pipe( pipe_slow ); 6701 %} 6702 6703 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6704 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6705 match(Set dst (SubVB src (LoadVector mem))); 6706 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6707 ins_encode %{ 6708 int vector_len = 0; 6709 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6710 %} 6711 ins_pipe( pipe_slow ); 6712 %} 6713 6714 instruct vsub16B(vecX dst, vecX src) %{ 6715 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6716 match(Set dst (SubVB dst src)); 6717 format %{ "psubb $dst,$src\t! sub packed16B" %} 6718 ins_encode %{ 6719 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6720 %} 6721 ins_pipe( pipe_slow ); 6722 %} 6723 6724 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6725 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6726 match(Set dst (SubVB src1 src2)); 6727 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6728 ins_encode %{ 6729 int vector_len = 0; 6730 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6731 %} 6732 ins_pipe( pipe_slow ); 6733 %} 6734 6735 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6736 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6737 match(Set dst (SubVB src (LoadVector mem))); 6738 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6739 ins_encode %{ 6740 int vector_len = 0; 6741 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6742 %} 6743 ins_pipe( pipe_slow ); 6744 %} 6745 6746 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6747 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6748 match(Set dst (SubVB src1 src2)); 6749 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6750 ins_encode %{ 6751 int vector_len = 1; 6752 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6753 %} 6754 ins_pipe( pipe_slow ); 6755 %} 6756 6757 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6758 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6759 match(Set dst (SubVB src (LoadVector mem))); 6760 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6761 ins_encode %{ 6762 int vector_len = 1; 6763 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6764 %} 6765 ins_pipe( pipe_slow ); 6766 %} 6767 6768 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6769 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6770 match(Set dst (SubVB src1 src2)); 6771 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6772 ins_encode %{ 6773 int vector_len = 2; 6774 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6775 %} 6776 ins_pipe( pipe_slow ); 6777 %} 6778 6779 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6780 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6781 match(Set dst (SubVB src (LoadVector mem))); 6782 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6783 ins_encode %{ 6784 int vector_len = 2; 6785 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 // Shorts/Chars vector sub 6791 instruct vsub2S(vecS dst, vecS src) %{ 6792 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6793 match(Set dst (SubVS dst src)); 6794 format %{ "psubw $dst,$src\t! sub packed2S" %} 6795 ins_encode %{ 6796 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6802 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6803 match(Set dst (SubVS src1 src2)); 6804 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6805 ins_encode %{ 6806 int vector_len = 0; 6807 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6808 %} 6809 ins_pipe( pipe_slow ); 6810 %} 6811 6812 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6813 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6814 match(Set dst (SubVS src (LoadVector mem))); 6815 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6816 ins_encode %{ 6817 int vector_len = 0; 6818 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6819 %} 6820 ins_pipe( pipe_slow ); 6821 %} 6822 6823 instruct vsub4S(vecD dst, vecD src) %{ 6824 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6825 match(Set dst (SubVS dst src)); 6826 format %{ "psubw $dst,$src\t! sub packed4S" %} 6827 ins_encode %{ 6828 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6834 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6835 match(Set dst (SubVS src1 src2)); 6836 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6837 ins_encode %{ 6838 int vector_len = 0; 6839 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6840 %} 6841 ins_pipe( pipe_slow ); 6842 %} 6843 6844 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6845 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6846 match(Set dst (SubVS src (LoadVector mem))); 6847 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6848 ins_encode %{ 6849 int vector_len = 0; 6850 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6851 %} 6852 ins_pipe( pipe_slow ); 6853 %} 6854 6855 instruct vsub8S(vecX dst, vecX src) %{ 6856 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6857 match(Set dst (SubVS dst src)); 6858 format %{ "psubw $dst,$src\t! sub packed8S" %} 6859 ins_encode %{ 6860 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6861 %} 6862 ins_pipe( pipe_slow ); 6863 %} 6864 6865 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6866 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6867 match(Set dst (SubVS src1 src2)); 6868 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6869 ins_encode %{ 6870 int vector_len = 0; 6871 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6872 %} 6873 ins_pipe( pipe_slow ); 6874 %} 6875 6876 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6877 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6878 match(Set dst (SubVS src (LoadVector mem))); 6879 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6880 ins_encode %{ 6881 int vector_len = 0; 6882 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6883 %} 6884 ins_pipe( pipe_slow ); 6885 %} 6886 6887 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6888 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6889 match(Set dst (SubVS src1 src2)); 6890 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6891 ins_encode %{ 6892 int vector_len = 1; 6893 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6894 %} 6895 ins_pipe( pipe_slow ); 6896 %} 6897 6898 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6899 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6900 match(Set dst (SubVS src (LoadVector mem))); 6901 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6902 ins_encode %{ 6903 int vector_len = 1; 6904 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6905 %} 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6910 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6911 match(Set dst (SubVS src1 src2)); 6912 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6913 ins_encode %{ 6914 int vector_len = 2; 6915 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6921 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6922 match(Set dst (SubVS src (LoadVector mem))); 6923 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6924 ins_encode %{ 6925 int vector_len = 2; 6926 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 // Integers vector sub 6932 instruct vsub2I(vecD dst, vecD src) %{ 6933 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6934 match(Set dst (SubVI dst src)); 6935 format %{ "psubd $dst,$src\t! sub packed2I" %} 6936 ins_encode %{ 6937 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6938 %} 6939 ins_pipe( pipe_slow ); 6940 %} 6941 6942 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6943 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6944 match(Set dst (SubVI src1 src2)); 6945 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6946 ins_encode %{ 6947 int vector_len = 0; 6948 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6949 %} 6950 ins_pipe( pipe_slow ); 6951 %} 6952 6953 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6954 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6955 match(Set dst (SubVI src (LoadVector mem))); 6956 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6957 ins_encode %{ 6958 int vector_len = 0; 6959 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct vsub4I(vecX dst, vecX src) %{ 6965 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6966 match(Set dst (SubVI dst src)); 6967 format %{ "psubd $dst,$src\t! sub packed4I" %} 6968 ins_encode %{ 6969 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6975 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6976 match(Set dst (SubVI src1 src2)); 6977 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6978 ins_encode %{ 6979 int vector_len = 0; 6980 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6986 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6987 match(Set dst (SubVI src (LoadVector mem))); 6988 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6989 ins_encode %{ 6990 int vector_len = 0; 6991 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6997 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6998 match(Set dst (SubVI src1 src2)); 6999 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7000 ins_encode %{ 7001 int vector_len = 1; 7002 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7008 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7009 match(Set dst (SubVI src (LoadVector mem))); 7010 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7011 ins_encode %{ 7012 int vector_len = 1; 7013 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7019 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7020 match(Set dst (SubVI src1 src2)); 7021 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7022 ins_encode %{ 7023 int vector_len = 2; 7024 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7030 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7031 match(Set dst (SubVI src (LoadVector mem))); 7032 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7033 ins_encode %{ 7034 int vector_len = 2; 7035 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 // Longs vector sub 7041 instruct vsub2L(vecX dst, vecX src) %{ 7042 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7043 match(Set dst (SubVL dst src)); 7044 format %{ "psubq $dst,$src\t! sub packed2L" %} 7045 ins_encode %{ 7046 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7052 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7053 match(Set dst (SubVL src1 src2)); 7054 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7055 ins_encode %{ 7056 int vector_len = 0; 7057 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7063 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7064 match(Set dst (SubVL src (LoadVector mem))); 7065 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7066 ins_encode %{ 7067 int vector_len = 0; 7068 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7074 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7075 match(Set dst (SubVL src1 src2)); 7076 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7077 ins_encode %{ 7078 int vector_len = 1; 7079 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7085 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7086 match(Set dst (SubVL src (LoadVector mem))); 7087 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7088 ins_encode %{ 7089 int vector_len = 1; 7090 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7096 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7097 match(Set dst (SubVL src1 src2)); 7098 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7099 ins_encode %{ 7100 int vector_len = 2; 7101 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7107 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7108 match(Set dst (SubVL src (LoadVector mem))); 7109 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7110 ins_encode %{ 7111 int vector_len = 2; 7112 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7113 %} 7114 ins_pipe( pipe_slow ); 7115 %} 7116 7117 // Floats vector sub 7118 instruct vsub2F(vecD dst, vecD src) %{ 7119 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7120 match(Set dst (SubVF dst src)); 7121 format %{ "subps $dst,$src\t! sub packed2F" %} 7122 ins_encode %{ 7123 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7124 %} 7125 ins_pipe( pipe_slow ); 7126 %} 7127 7128 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7129 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7130 match(Set dst (SubVF src1 src2)); 7131 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7132 ins_encode %{ 7133 int vector_len = 0; 7134 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7135 %} 7136 ins_pipe( pipe_slow ); 7137 %} 7138 7139 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7140 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7141 match(Set dst (SubVF src (LoadVector mem))); 7142 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7143 ins_encode %{ 7144 int vector_len = 0; 7145 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7146 %} 7147 ins_pipe( pipe_slow ); 7148 %} 7149 7150 instruct vsub4F(vecX dst, vecX src) %{ 7151 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7152 match(Set dst (SubVF dst src)); 7153 format %{ "subps $dst,$src\t! sub packed4F" %} 7154 ins_encode %{ 7155 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7161 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7162 match(Set dst (SubVF src1 src2)); 7163 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7164 ins_encode %{ 7165 int vector_len = 0; 7166 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7172 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7173 match(Set dst (SubVF src (LoadVector mem))); 7174 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7175 ins_encode %{ 7176 int vector_len = 0; 7177 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7183 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7184 match(Set dst (SubVF src1 src2)); 7185 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7186 ins_encode %{ 7187 int vector_len = 1; 7188 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7194 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7195 match(Set dst (SubVF src (LoadVector mem))); 7196 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7197 ins_encode %{ 7198 int vector_len = 1; 7199 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7205 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7206 match(Set dst (SubVF src1 src2)); 7207 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7208 ins_encode %{ 7209 int vector_len = 2; 7210 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7216 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7217 match(Set dst (SubVF src (LoadVector mem))); 7218 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7219 ins_encode %{ 7220 int vector_len = 2; 7221 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 // Doubles vector sub 7227 instruct vsub2D(vecX dst, vecX src) %{ 7228 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7229 match(Set dst (SubVD dst src)); 7230 format %{ "subpd $dst,$src\t! sub packed2D" %} 7231 ins_encode %{ 7232 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7238 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7239 match(Set dst (SubVD src1 src2)); 7240 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7241 ins_encode %{ 7242 int vector_len = 0; 7243 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7249 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7250 match(Set dst (SubVD src (LoadVector mem))); 7251 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7252 ins_encode %{ 7253 int vector_len = 0; 7254 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7260 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7261 match(Set dst (SubVD src1 src2)); 7262 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7263 ins_encode %{ 7264 int vector_len = 1; 7265 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7271 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7272 match(Set dst (SubVD src (LoadVector mem))); 7273 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7274 ins_encode %{ 7275 int vector_len = 1; 7276 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7277 %} 7278 ins_pipe( pipe_slow ); 7279 %} 7280 7281 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7282 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7283 match(Set dst (SubVD src1 src2)); 7284 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7285 ins_encode %{ 7286 int vector_len = 2; 7287 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7293 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7294 match(Set dst (SubVD src (LoadVector mem))); 7295 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7296 ins_encode %{ 7297 int vector_len = 2; 7298 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 // --------------------------------- MUL -------------------------------------- 7304 7305 // Shorts/Chars vector mul 7306 instruct vmul2S(vecS dst, vecS src) %{ 7307 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7308 match(Set dst (MulVS dst src)); 7309 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7310 ins_encode %{ 7311 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7317 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7318 match(Set dst (MulVS src1 src2)); 7319 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7320 ins_encode %{ 7321 int vector_len = 0; 7322 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7323 %} 7324 ins_pipe( pipe_slow ); 7325 %} 7326 7327 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7328 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7329 match(Set dst (MulVS src (LoadVector mem))); 7330 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7331 ins_encode %{ 7332 int vector_len = 0; 7333 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7334 %} 7335 ins_pipe( pipe_slow ); 7336 %} 7337 7338 instruct vmul4S(vecD dst, vecD src) %{ 7339 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7340 match(Set dst (MulVS dst src)); 7341 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7342 ins_encode %{ 7343 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7349 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7350 match(Set dst (MulVS src1 src2)); 7351 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7352 ins_encode %{ 7353 int vector_len = 0; 7354 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7355 %} 7356 ins_pipe( pipe_slow ); 7357 %} 7358 7359 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7361 match(Set dst (MulVS src (LoadVector mem))); 7362 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7363 ins_encode %{ 7364 int vector_len = 0; 7365 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 instruct vmul8S(vecX dst, vecX src) %{ 7371 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7372 match(Set dst (MulVS dst src)); 7373 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7374 ins_encode %{ 7375 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7376 %} 7377 ins_pipe( pipe_slow ); 7378 %} 7379 7380 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7381 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7382 match(Set dst (MulVS src1 src2)); 7383 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7384 ins_encode %{ 7385 int vector_len = 0; 7386 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7387 %} 7388 ins_pipe( pipe_slow ); 7389 %} 7390 7391 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7392 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7393 match(Set dst (MulVS src (LoadVector mem))); 7394 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7395 ins_encode %{ 7396 int vector_len = 0; 7397 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7398 %} 7399 ins_pipe( pipe_slow ); 7400 %} 7401 7402 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7403 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7404 match(Set dst (MulVS src1 src2)); 7405 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7406 ins_encode %{ 7407 int vector_len = 1; 7408 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7409 %} 7410 ins_pipe( pipe_slow ); 7411 %} 7412 7413 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7414 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7415 match(Set dst (MulVS src (LoadVector mem))); 7416 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7417 ins_encode %{ 7418 int vector_len = 1; 7419 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7420 %} 7421 ins_pipe( pipe_slow ); 7422 %} 7423 7424 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7425 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7426 match(Set dst (MulVS src1 src2)); 7427 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7428 ins_encode %{ 7429 int vector_len = 2; 7430 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7431 %} 7432 ins_pipe( pipe_slow ); 7433 %} 7434 7435 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7436 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7437 match(Set dst (MulVS src (LoadVector mem))); 7438 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7439 ins_encode %{ 7440 int vector_len = 2; 7441 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7442 %} 7443 ins_pipe( pipe_slow ); 7444 %} 7445 7446 // Integers vector mul (sse4_1) 7447 instruct vmul2I(vecD dst, vecD src) %{ 7448 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7449 match(Set dst (MulVI dst src)); 7450 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7451 ins_encode %{ 7452 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7453 %} 7454 ins_pipe( pipe_slow ); 7455 %} 7456 7457 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7458 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7459 match(Set dst (MulVI src1 src2)); 7460 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7461 ins_encode %{ 7462 int vector_len = 0; 7463 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7464 %} 7465 ins_pipe( pipe_slow ); 7466 %} 7467 7468 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7469 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7470 match(Set dst (MulVI src (LoadVector mem))); 7471 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7472 ins_encode %{ 7473 int vector_len = 0; 7474 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7475 %} 7476 ins_pipe( pipe_slow ); 7477 %} 7478 7479 instruct vmul4I(vecX dst, vecX src) %{ 7480 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7481 match(Set dst (MulVI dst src)); 7482 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7483 ins_encode %{ 7484 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7485 %} 7486 ins_pipe( pipe_slow ); 7487 %} 7488 7489 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7490 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7491 match(Set dst (MulVI src1 src2)); 7492 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7493 ins_encode %{ 7494 int vector_len = 0; 7495 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7496 %} 7497 ins_pipe( pipe_slow ); 7498 %} 7499 7500 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7501 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7502 match(Set dst (MulVI src (LoadVector mem))); 7503 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7504 ins_encode %{ 7505 int vector_len = 0; 7506 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7507 %} 7508 ins_pipe( pipe_slow ); 7509 %} 7510 7511 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7512 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7513 match(Set dst (MulVL src1 src2)); 7514 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7515 ins_encode %{ 7516 int vector_len = 0; 7517 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7518 %} 7519 ins_pipe( pipe_slow ); 7520 %} 7521 7522 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7523 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7524 match(Set dst (MulVL src (LoadVector mem))); 7525 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7526 ins_encode %{ 7527 int vector_len = 0; 7528 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7529 %} 7530 ins_pipe( pipe_slow ); 7531 %} 7532 7533 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7534 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7535 match(Set dst (MulVL src1 src2)); 7536 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7537 ins_encode %{ 7538 int vector_len = 1; 7539 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7540 %} 7541 ins_pipe( pipe_slow ); 7542 %} 7543 7544 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7545 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7546 match(Set dst (MulVL src (LoadVector mem))); 7547 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7548 ins_encode %{ 7549 int vector_len = 1; 7550 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7551 %} 7552 ins_pipe( pipe_slow ); 7553 %} 7554 7555 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7556 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7557 match(Set dst (MulVL src1 src2)); 7558 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7559 ins_encode %{ 7560 int vector_len = 2; 7561 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7562 %} 7563 ins_pipe( pipe_slow ); 7564 %} 7565 7566 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7567 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7568 match(Set dst (MulVL src (LoadVector mem))); 7569 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7570 ins_encode %{ 7571 int vector_len = 2; 7572 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7573 %} 7574 ins_pipe( pipe_slow ); 7575 %} 7576 7577 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7578 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7579 match(Set dst (MulVI src1 src2)); 7580 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7581 ins_encode %{ 7582 int vector_len = 1; 7583 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7584 %} 7585 ins_pipe( pipe_slow ); 7586 %} 7587 7588 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7589 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7590 match(Set dst (MulVI src (LoadVector mem))); 7591 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7592 ins_encode %{ 7593 int vector_len = 1; 7594 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7595 %} 7596 ins_pipe( pipe_slow ); 7597 %} 7598 7599 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7600 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7601 match(Set dst (MulVI src1 src2)); 7602 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7603 ins_encode %{ 7604 int vector_len = 2; 7605 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7606 %} 7607 ins_pipe( pipe_slow ); 7608 %} 7609 7610 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7611 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7612 match(Set dst (MulVI src (LoadVector mem))); 7613 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7614 ins_encode %{ 7615 int vector_len = 2; 7616 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7617 %} 7618 ins_pipe( pipe_slow ); 7619 %} 7620 7621 // Floats vector mul 7622 instruct vmul2F(vecD dst, vecD src) %{ 7623 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7624 match(Set dst (MulVF dst src)); 7625 format %{ "mulps $dst,$src\t! mul packed2F" %} 7626 ins_encode %{ 7627 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7628 %} 7629 ins_pipe( pipe_slow ); 7630 %} 7631 7632 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7633 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7634 match(Set dst (MulVF src1 src2)); 7635 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7636 ins_encode %{ 7637 int vector_len = 0; 7638 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7639 %} 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7644 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7645 match(Set dst (MulVF src (LoadVector mem))); 7646 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7647 ins_encode %{ 7648 int vector_len = 0; 7649 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7650 %} 7651 ins_pipe( pipe_slow ); 7652 %} 7653 7654 instruct vmul4F(vecX dst, vecX src) %{ 7655 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7656 match(Set dst (MulVF dst src)); 7657 format %{ "mulps $dst,$src\t! mul packed4F" %} 7658 ins_encode %{ 7659 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7660 %} 7661 ins_pipe( pipe_slow ); 7662 %} 7663 7664 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7665 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7666 match(Set dst (MulVF src1 src2)); 7667 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7668 ins_encode %{ 7669 int vector_len = 0; 7670 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7671 %} 7672 ins_pipe( pipe_slow ); 7673 %} 7674 7675 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7676 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7677 match(Set dst (MulVF src (LoadVector mem))); 7678 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7679 ins_encode %{ 7680 int vector_len = 0; 7681 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7682 %} 7683 ins_pipe( pipe_slow ); 7684 %} 7685 7686 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7687 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7688 match(Set dst (MulVF src1 src2)); 7689 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7690 ins_encode %{ 7691 int vector_len = 1; 7692 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7693 %} 7694 ins_pipe( pipe_slow ); 7695 %} 7696 7697 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7698 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7699 match(Set dst (MulVF src (LoadVector mem))); 7700 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7701 ins_encode %{ 7702 int vector_len = 1; 7703 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7709 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7710 match(Set dst (MulVF src1 src2)); 7711 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7712 ins_encode %{ 7713 int vector_len = 2; 7714 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7715 %} 7716 ins_pipe( pipe_slow ); 7717 %} 7718 7719 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7720 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7721 match(Set dst (MulVF src (LoadVector mem))); 7722 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7723 ins_encode %{ 7724 int vector_len = 2; 7725 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 // Doubles vector mul 7731 instruct vmul2D(vecX dst, vecX src) %{ 7732 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7733 match(Set dst (MulVD dst src)); 7734 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7735 ins_encode %{ 7736 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7742 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7743 match(Set dst (MulVD src1 src2)); 7744 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7745 ins_encode %{ 7746 int vector_len = 0; 7747 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7753 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7754 match(Set dst (MulVD src (LoadVector mem))); 7755 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7756 ins_encode %{ 7757 int vector_len = 0; 7758 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7759 %} 7760 ins_pipe( pipe_slow ); 7761 %} 7762 7763 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7764 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7765 match(Set dst (MulVD src1 src2)); 7766 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7767 ins_encode %{ 7768 int vector_len = 1; 7769 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7775 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7776 match(Set dst (MulVD src (LoadVector mem))); 7777 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7778 ins_encode %{ 7779 int vector_len = 1; 7780 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7781 %} 7782 ins_pipe( pipe_slow ); 7783 %} 7784 7785 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7786 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7787 match(Set dst (MulVD src1 src2)); 7788 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7789 ins_encode %{ 7790 int vector_len = 2; 7791 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7792 %} 7793 ins_pipe( pipe_slow ); 7794 %} 7795 7796 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7797 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7798 match(Set dst (MulVD src (LoadVector mem))); 7799 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7800 ins_encode %{ 7801 int vector_len = 2; 7802 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7803 %} 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7808 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7809 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7810 effect(TEMP dst, USE src1, USE src2); 7811 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7812 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7813 %} 7814 ins_encode %{ 7815 int vector_len = 1; 7816 int cond = (Assembler::Condition)($copnd$$cmpcode); 7817 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7818 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7819 %} 7820 ins_pipe( pipe_slow ); 7821 %} 7822 7823 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7824 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7825 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7826 effect(TEMP dst, USE src1, USE src2); 7827 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7828 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7829 %} 7830 ins_encode %{ 7831 int vector_len = 1; 7832 int cond = (Assembler::Condition)($copnd$$cmpcode); 7833 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7834 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7835 %} 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 // --------------------------------- DIV -------------------------------------- 7840 7841 // Floats vector div 7842 instruct vdiv2F(vecD dst, vecD src) %{ 7843 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7844 match(Set dst (DivVF dst src)); 7845 format %{ "divps $dst,$src\t! div packed2F" %} 7846 ins_encode %{ 7847 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7848 %} 7849 ins_pipe( pipe_slow ); 7850 %} 7851 7852 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7853 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7854 match(Set dst (DivVF src1 src2)); 7855 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7856 ins_encode %{ 7857 int vector_len = 0; 7858 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7864 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7865 match(Set dst (DivVF src (LoadVector mem))); 7866 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7867 ins_encode %{ 7868 int vector_len = 0; 7869 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7870 %} 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 instruct vdiv4F(vecX dst, vecX src) %{ 7875 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7876 match(Set dst (DivVF dst src)); 7877 format %{ "divps $dst,$src\t! div packed4F" %} 7878 ins_encode %{ 7879 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7885 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7886 match(Set dst (DivVF src1 src2)); 7887 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7888 ins_encode %{ 7889 int vector_len = 0; 7890 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7891 %} 7892 ins_pipe( pipe_slow ); 7893 %} 7894 7895 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7896 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7897 match(Set dst (DivVF src (LoadVector mem))); 7898 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7899 ins_encode %{ 7900 int vector_len = 0; 7901 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7907 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7908 match(Set dst (DivVF src1 src2)); 7909 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7910 ins_encode %{ 7911 int vector_len = 1; 7912 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7918 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7919 match(Set dst (DivVF src (LoadVector mem))); 7920 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7921 ins_encode %{ 7922 int vector_len = 1; 7923 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7929 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7930 match(Set dst (DivVF src1 src2)); 7931 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7932 ins_encode %{ 7933 int vector_len = 2; 7934 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7940 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7941 match(Set dst (DivVF src (LoadVector mem))); 7942 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7943 ins_encode %{ 7944 int vector_len = 2; 7945 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 // Doubles vector div 7951 instruct vdiv2D(vecX dst, vecX src) %{ 7952 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7953 match(Set dst (DivVD dst src)); 7954 format %{ "divpd $dst,$src\t! div packed2D" %} 7955 ins_encode %{ 7956 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7957 %} 7958 ins_pipe( pipe_slow ); 7959 %} 7960 7961 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7962 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7963 match(Set dst (DivVD src1 src2)); 7964 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7965 ins_encode %{ 7966 int vector_len = 0; 7967 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7968 %} 7969 ins_pipe( pipe_slow ); 7970 %} 7971 7972 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7973 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7974 match(Set dst (DivVD src (LoadVector mem))); 7975 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7976 ins_encode %{ 7977 int vector_len = 0; 7978 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7984 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7985 match(Set dst (DivVD src1 src2)); 7986 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7987 ins_encode %{ 7988 int vector_len = 1; 7989 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7995 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7996 match(Set dst (DivVD src (LoadVector mem))); 7997 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7998 ins_encode %{ 7999 int vector_len = 1; 8000 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8001 %} 8002 ins_pipe( pipe_slow ); 8003 %} 8004 8005 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8006 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8007 match(Set dst (DivVD src1 src2)); 8008 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8009 ins_encode %{ 8010 int vector_len = 2; 8011 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8017 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8018 match(Set dst (DivVD src (LoadVector mem))); 8019 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8020 ins_encode %{ 8021 int vector_len = 2; 8022 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 // ------------------------------ Shift --------------------------------------- 8028 8029 // Left and right shift count vectors are the same on x86 8030 // (only lowest bits of xmm reg are used for count). 8031 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8032 match(Set dst (LShiftCntV cnt)); 8033 match(Set dst (RShiftCntV cnt)); 8034 format %{ "movd $dst,$cnt\t! load shift count" %} 8035 ins_encode %{ 8036 __ movdl($dst$$XMMRegister, $cnt$$Register); 8037 %} 8038 ins_pipe( pipe_slow ); 8039 %} 8040 8041 // --------------------------------- Sqrt -------------------------------------- 8042 8043 // Floating point vector sqrt 8044 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8046 match(Set dst (SqrtVD src)); 8047 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8048 ins_encode %{ 8049 int vector_len = 0; 8050 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8051 %} 8052 ins_pipe( pipe_slow ); 8053 %} 8054 8055 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8056 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8057 match(Set dst (SqrtVD (LoadVector mem))); 8058 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8059 ins_encode %{ 8060 int vector_len = 0; 8061 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8062 %} 8063 ins_pipe( pipe_slow ); 8064 %} 8065 8066 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8067 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8068 match(Set dst (SqrtVD src)); 8069 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8070 ins_encode %{ 8071 int vector_len = 1; 8072 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8073 %} 8074 ins_pipe( pipe_slow ); 8075 %} 8076 8077 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8078 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8079 match(Set dst (SqrtVD (LoadVector mem))); 8080 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8081 ins_encode %{ 8082 int vector_len = 1; 8083 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8084 %} 8085 ins_pipe( pipe_slow ); 8086 %} 8087 8088 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8089 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8090 match(Set dst (SqrtVD src)); 8091 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8092 ins_encode %{ 8093 int vector_len = 2; 8094 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8100 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8101 match(Set dst (SqrtVD (LoadVector mem))); 8102 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8103 ins_encode %{ 8104 int vector_len = 2; 8105 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8111 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8112 match(Set dst (SqrtVF src)); 8113 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8114 ins_encode %{ 8115 int vector_len = 0; 8116 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8117 %} 8118 ins_pipe( pipe_slow ); 8119 %} 8120 8121 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8122 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8123 match(Set dst (SqrtVF (LoadVector mem))); 8124 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8125 ins_encode %{ 8126 int vector_len = 0; 8127 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8128 %} 8129 ins_pipe( pipe_slow ); 8130 %} 8131 8132 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8133 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8134 match(Set dst (SqrtVF src)); 8135 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8136 ins_encode %{ 8137 int vector_len = 0; 8138 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8139 %} 8140 ins_pipe( pipe_slow ); 8141 %} 8142 8143 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8144 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8145 match(Set dst (SqrtVF (LoadVector mem))); 8146 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8147 ins_encode %{ 8148 int vector_len = 0; 8149 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8150 %} 8151 ins_pipe( pipe_slow ); 8152 %} 8153 8154 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8155 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8156 match(Set dst (SqrtVF src)); 8157 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8158 ins_encode %{ 8159 int vector_len = 1; 8160 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8161 %} 8162 ins_pipe( pipe_slow ); 8163 %} 8164 8165 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8166 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8167 match(Set dst (SqrtVF (LoadVector mem))); 8168 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8169 ins_encode %{ 8170 int vector_len = 1; 8171 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8172 %} 8173 ins_pipe( pipe_slow ); 8174 %} 8175 8176 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8177 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8178 match(Set dst (SqrtVF src)); 8179 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8180 ins_encode %{ 8181 int vector_len = 2; 8182 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8183 %} 8184 ins_pipe( pipe_slow ); 8185 %} 8186 8187 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8188 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8189 match(Set dst (SqrtVF (LoadVector mem))); 8190 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8191 ins_encode %{ 8192 int vector_len = 2; 8193 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8194 %} 8195 ins_pipe( pipe_slow ); 8196 %} 8197 8198 // ------------------------------ LeftShift ----------------------------------- 8199 8200 // Shorts/Chars vector left shift 8201 instruct vsll2S(vecS dst, vecS shift) %{ 8202 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8203 match(Set dst (LShiftVS dst shift)); 8204 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8205 ins_encode %{ 8206 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8212 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8213 match(Set dst (LShiftVS dst shift)); 8214 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8215 ins_encode %{ 8216 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8223 match(Set dst (LShiftVS src shift)); 8224 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8225 ins_encode %{ 8226 int vector_len = 0; 8227 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8234 match(Set dst (LShiftVS src shift)); 8235 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8236 ins_encode %{ 8237 int vector_len = 0; 8238 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vsll4S(vecD dst, vecS shift) %{ 8244 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8245 match(Set dst (LShiftVS dst shift)); 8246 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8247 ins_encode %{ 8248 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8249 %} 8250 ins_pipe( pipe_slow ); 8251 %} 8252 8253 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8254 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8255 match(Set dst (LShiftVS dst shift)); 8256 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8257 ins_encode %{ 8258 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8259 %} 8260 ins_pipe( pipe_slow ); 8261 %} 8262 8263 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8264 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8265 match(Set dst (LShiftVS src shift)); 8266 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8267 ins_encode %{ 8268 int vector_len = 0; 8269 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8270 %} 8271 ins_pipe( pipe_slow ); 8272 %} 8273 8274 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8276 match(Set dst (LShiftVS src shift)); 8277 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8278 ins_encode %{ 8279 int vector_len = 0; 8280 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 instruct vsll8S(vecX dst, vecS shift) %{ 8286 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8287 match(Set dst (LShiftVS dst shift)); 8288 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8289 ins_encode %{ 8290 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8291 %} 8292 ins_pipe( pipe_slow ); 8293 %} 8294 8295 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8296 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8297 match(Set dst (LShiftVS dst shift)); 8298 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8299 ins_encode %{ 8300 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8301 %} 8302 ins_pipe( pipe_slow ); 8303 %} 8304 8305 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8306 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8307 match(Set dst (LShiftVS src shift)); 8308 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8309 ins_encode %{ 8310 int vector_len = 0; 8311 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8312 %} 8313 ins_pipe( pipe_slow ); 8314 %} 8315 8316 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8317 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8318 match(Set dst (LShiftVS src shift)); 8319 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8320 ins_encode %{ 8321 int vector_len = 0; 8322 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8323 %} 8324 ins_pipe( pipe_slow ); 8325 %} 8326 8327 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8328 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8329 match(Set dst (LShiftVS src shift)); 8330 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8331 ins_encode %{ 8332 int vector_len = 1; 8333 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8334 %} 8335 ins_pipe( pipe_slow ); 8336 %} 8337 8338 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8339 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8340 match(Set dst (LShiftVS src shift)); 8341 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8342 ins_encode %{ 8343 int vector_len = 1; 8344 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8345 %} 8346 ins_pipe( pipe_slow ); 8347 %} 8348 8349 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8350 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8351 match(Set dst (LShiftVS src shift)); 8352 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8353 ins_encode %{ 8354 int vector_len = 2; 8355 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8356 %} 8357 ins_pipe( pipe_slow ); 8358 %} 8359 8360 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8361 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8362 match(Set dst (LShiftVS src shift)); 8363 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8364 ins_encode %{ 8365 int vector_len = 2; 8366 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8367 %} 8368 ins_pipe( pipe_slow ); 8369 %} 8370 8371 // Integers vector left shift 8372 instruct vsll2I(vecD dst, vecS shift) %{ 8373 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8374 match(Set dst (LShiftVI dst shift)); 8375 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8376 ins_encode %{ 8377 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8378 %} 8379 ins_pipe( pipe_slow ); 8380 %} 8381 8382 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8383 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8384 match(Set dst (LShiftVI dst shift)); 8385 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8386 ins_encode %{ 8387 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8394 match(Set dst (LShiftVI src shift)); 8395 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8396 ins_encode %{ 8397 int vector_len = 0; 8398 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8399 %} 8400 ins_pipe( pipe_slow ); 8401 %} 8402 8403 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8405 match(Set dst (LShiftVI src shift)); 8406 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8407 ins_encode %{ 8408 int vector_len = 0; 8409 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8410 %} 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 instruct vsll4I(vecX dst, vecS shift) %{ 8415 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8416 match(Set dst (LShiftVI dst shift)); 8417 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8418 ins_encode %{ 8419 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 8424 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8425 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8426 match(Set dst (LShiftVI dst shift)); 8427 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8428 ins_encode %{ 8429 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8430 %} 8431 ins_pipe( pipe_slow ); 8432 %} 8433 8434 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8435 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8436 match(Set dst (LShiftVI src shift)); 8437 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8438 ins_encode %{ 8439 int vector_len = 0; 8440 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8441 %} 8442 ins_pipe( pipe_slow ); 8443 %} 8444 8445 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8446 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8447 match(Set dst (LShiftVI src shift)); 8448 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8449 ins_encode %{ 8450 int vector_len = 0; 8451 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8452 %} 8453 ins_pipe( pipe_slow ); 8454 %} 8455 8456 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8457 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8458 match(Set dst (LShiftVI src shift)); 8459 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8460 ins_encode %{ 8461 int vector_len = 1; 8462 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8463 %} 8464 ins_pipe( pipe_slow ); 8465 %} 8466 8467 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8468 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8469 match(Set dst (LShiftVI src shift)); 8470 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8471 ins_encode %{ 8472 int vector_len = 1; 8473 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8479 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8480 match(Set dst (LShiftVI src shift)); 8481 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8482 ins_encode %{ 8483 int vector_len = 2; 8484 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8490 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8491 match(Set dst (LShiftVI src shift)); 8492 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8493 ins_encode %{ 8494 int vector_len = 2; 8495 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 // Longs vector left shift 8501 instruct vsll2L(vecX dst, vecS shift) %{ 8502 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8503 match(Set dst (LShiftVL dst shift)); 8504 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8505 ins_encode %{ 8506 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8512 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8513 match(Set dst (LShiftVL dst shift)); 8514 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8515 ins_encode %{ 8516 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8523 match(Set dst (LShiftVL src shift)); 8524 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8525 ins_encode %{ 8526 int vector_len = 0; 8527 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8528 %} 8529 ins_pipe( pipe_slow ); 8530 %} 8531 8532 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8533 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8534 match(Set dst (LShiftVL src shift)); 8535 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8536 ins_encode %{ 8537 int vector_len = 0; 8538 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8539 %} 8540 ins_pipe( pipe_slow ); 8541 %} 8542 8543 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8544 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8545 match(Set dst (LShiftVL src shift)); 8546 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8547 ins_encode %{ 8548 int vector_len = 1; 8549 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8550 %} 8551 ins_pipe( pipe_slow ); 8552 %} 8553 8554 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8555 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8556 match(Set dst (LShiftVL src shift)); 8557 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8558 ins_encode %{ 8559 int vector_len = 1; 8560 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8561 %} 8562 ins_pipe( pipe_slow ); 8563 %} 8564 8565 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8566 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8567 match(Set dst (LShiftVL src shift)); 8568 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8569 ins_encode %{ 8570 int vector_len = 2; 8571 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8572 %} 8573 ins_pipe( pipe_slow ); 8574 %} 8575 8576 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8577 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8578 match(Set dst (LShiftVL src shift)); 8579 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8580 ins_encode %{ 8581 int vector_len = 2; 8582 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8583 %} 8584 ins_pipe( pipe_slow ); 8585 %} 8586 8587 // ----------------------- LogicalRightShift ----------------------------------- 8588 8589 // Shorts vector logical right shift produces incorrect Java result 8590 // for negative data because java code convert short value into int with 8591 // sign extension before a shift. But char vectors are fine since chars are 8592 // unsigned values. 8593 8594 instruct vsrl2S(vecS dst, vecS shift) %{ 8595 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8596 match(Set dst (URShiftVS dst shift)); 8597 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8598 ins_encode %{ 8599 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8600 %} 8601 ins_pipe( pipe_slow ); 8602 %} 8603 8604 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8605 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8606 match(Set dst (URShiftVS dst shift)); 8607 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8608 ins_encode %{ 8609 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8615 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8616 match(Set dst (URShiftVS src shift)); 8617 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8618 ins_encode %{ 8619 int vector_len = 0; 8620 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8626 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8627 match(Set dst (URShiftVS src shift)); 8628 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8629 ins_encode %{ 8630 int vector_len = 0; 8631 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8632 %} 8633 ins_pipe( pipe_slow ); 8634 %} 8635 8636 instruct vsrl4S(vecD dst, vecS shift) %{ 8637 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8638 match(Set dst (URShiftVS dst shift)); 8639 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8640 ins_encode %{ 8641 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8645 8646 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8647 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8648 match(Set dst (URShiftVS dst shift)); 8649 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8650 ins_encode %{ 8651 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8652 %} 8653 ins_pipe( pipe_slow ); 8654 %} 8655 8656 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8658 match(Set dst (URShiftVS src shift)); 8659 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8660 ins_encode %{ 8661 int vector_len = 0; 8662 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8663 %} 8664 ins_pipe( pipe_slow ); 8665 %} 8666 8667 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8668 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8669 match(Set dst (URShiftVS src shift)); 8670 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8671 ins_encode %{ 8672 int vector_len = 0; 8673 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8674 %} 8675 ins_pipe( pipe_slow ); 8676 %} 8677 8678 instruct vsrl8S(vecX dst, vecS shift) %{ 8679 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8680 match(Set dst (URShiftVS dst shift)); 8681 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8682 ins_encode %{ 8683 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8684 %} 8685 ins_pipe( pipe_slow ); 8686 %} 8687 8688 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8689 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8690 match(Set dst (URShiftVS dst shift)); 8691 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8692 ins_encode %{ 8693 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8694 %} 8695 ins_pipe( pipe_slow ); 8696 %} 8697 8698 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8699 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8700 match(Set dst (URShiftVS src shift)); 8701 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8702 ins_encode %{ 8703 int vector_len = 0; 8704 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8705 %} 8706 ins_pipe( pipe_slow ); 8707 %} 8708 8709 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8710 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8711 match(Set dst (URShiftVS src shift)); 8712 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8713 ins_encode %{ 8714 int vector_len = 0; 8715 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8721 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8722 match(Set dst (URShiftVS src shift)); 8723 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8724 ins_encode %{ 8725 int vector_len = 1; 8726 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8727 %} 8728 ins_pipe( pipe_slow ); 8729 %} 8730 8731 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8732 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8733 match(Set dst (URShiftVS src shift)); 8734 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8735 ins_encode %{ 8736 int vector_len = 1; 8737 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8738 %} 8739 ins_pipe( pipe_slow ); 8740 %} 8741 8742 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8743 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8744 match(Set dst (URShiftVS src shift)); 8745 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8746 ins_encode %{ 8747 int vector_len = 2; 8748 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8749 %} 8750 ins_pipe( pipe_slow ); 8751 %} 8752 8753 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8754 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8755 match(Set dst (URShiftVS src shift)); 8756 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8757 ins_encode %{ 8758 int vector_len = 2; 8759 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8760 %} 8761 ins_pipe( pipe_slow ); 8762 %} 8763 8764 // Integers vector logical right shift 8765 instruct vsrl2I(vecD dst, vecS shift) %{ 8766 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8767 match(Set dst (URShiftVI dst shift)); 8768 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8769 ins_encode %{ 8770 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8771 %} 8772 ins_pipe( pipe_slow ); 8773 %} 8774 8775 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8776 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8777 match(Set dst (URShiftVI dst shift)); 8778 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8779 ins_encode %{ 8780 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8781 %} 8782 ins_pipe( pipe_slow ); 8783 %} 8784 8785 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8786 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8787 match(Set dst (URShiftVI src shift)); 8788 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8789 ins_encode %{ 8790 int vector_len = 0; 8791 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8797 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8798 match(Set dst (URShiftVI src shift)); 8799 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8800 ins_encode %{ 8801 int vector_len = 0; 8802 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8803 %} 8804 ins_pipe( pipe_slow ); 8805 %} 8806 8807 instruct vsrl4I(vecX dst, vecS shift) %{ 8808 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8809 match(Set dst (URShiftVI dst shift)); 8810 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8811 ins_encode %{ 8812 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8813 %} 8814 ins_pipe( pipe_slow ); 8815 %} 8816 8817 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8818 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8819 match(Set dst (URShiftVI dst shift)); 8820 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8821 ins_encode %{ 8822 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8828 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8829 match(Set dst (URShiftVI src shift)); 8830 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8831 ins_encode %{ 8832 int vector_len = 0; 8833 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8834 %} 8835 ins_pipe( pipe_slow ); 8836 %} 8837 8838 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8839 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8840 match(Set dst (URShiftVI src shift)); 8841 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8842 ins_encode %{ 8843 int vector_len = 0; 8844 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8845 %} 8846 ins_pipe( pipe_slow ); 8847 %} 8848 8849 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8850 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8851 match(Set dst (URShiftVI src shift)); 8852 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8853 ins_encode %{ 8854 int vector_len = 1; 8855 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8861 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8862 match(Set dst (URShiftVI src shift)); 8863 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8864 ins_encode %{ 8865 int vector_len = 1; 8866 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8867 %} 8868 ins_pipe( pipe_slow ); 8869 %} 8870 8871 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8872 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8873 match(Set dst (URShiftVI src shift)); 8874 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8875 ins_encode %{ 8876 int vector_len = 2; 8877 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8878 %} 8879 ins_pipe( pipe_slow ); 8880 %} 8881 8882 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8883 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8884 match(Set dst (URShiftVI src shift)); 8885 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8886 ins_encode %{ 8887 int vector_len = 2; 8888 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8889 %} 8890 ins_pipe( pipe_slow ); 8891 %} 8892 8893 // Longs vector logical right shift 8894 instruct vsrl2L(vecX dst, vecS shift) %{ 8895 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8896 match(Set dst (URShiftVL dst shift)); 8897 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8898 ins_encode %{ 8899 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8905 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8906 match(Set dst (URShiftVL dst shift)); 8907 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8908 ins_encode %{ 8909 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8915 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8916 match(Set dst (URShiftVL src shift)); 8917 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8918 ins_encode %{ 8919 int vector_len = 0; 8920 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8921 %} 8922 ins_pipe( pipe_slow ); 8923 %} 8924 8925 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8926 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8927 match(Set dst (URShiftVL src shift)); 8928 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8929 ins_encode %{ 8930 int vector_len = 0; 8931 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8937 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8938 match(Set dst (URShiftVL src shift)); 8939 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8940 ins_encode %{ 8941 int vector_len = 1; 8942 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8943 %} 8944 ins_pipe( pipe_slow ); 8945 %} 8946 8947 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8948 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8949 match(Set dst (URShiftVL src shift)); 8950 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8951 ins_encode %{ 8952 int vector_len = 1; 8953 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8954 %} 8955 ins_pipe( pipe_slow ); 8956 %} 8957 8958 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8959 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8960 match(Set dst (URShiftVL src shift)); 8961 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8962 ins_encode %{ 8963 int vector_len = 2; 8964 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8965 %} 8966 ins_pipe( pipe_slow ); 8967 %} 8968 8969 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8970 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8971 match(Set dst (URShiftVL src shift)); 8972 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8973 ins_encode %{ 8974 int vector_len = 2; 8975 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8976 %} 8977 ins_pipe( pipe_slow ); 8978 %} 8979 8980 // ------------------- ArithmeticRightShift ----------------------------------- 8981 8982 // Shorts/Chars vector arithmetic right shift 8983 instruct vsra2S(vecS dst, vecS shift) %{ 8984 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8985 match(Set dst (RShiftVS dst shift)); 8986 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8987 ins_encode %{ 8988 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8989 %} 8990 ins_pipe( pipe_slow ); 8991 %} 8992 8993 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8994 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8995 match(Set dst (RShiftVS dst shift)); 8996 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8997 ins_encode %{ 8998 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8999 %} 9000 ins_pipe( pipe_slow ); 9001 %} 9002 9003 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 9004 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9005 match(Set dst (RShiftVS src shift)); 9006 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9007 ins_encode %{ 9008 int vector_len = 0; 9009 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9015 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9016 match(Set dst (RShiftVS src shift)); 9017 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9018 ins_encode %{ 9019 int vector_len = 0; 9020 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9021 %} 9022 ins_pipe( pipe_slow ); 9023 %} 9024 9025 instruct vsra4S(vecD dst, vecS shift) %{ 9026 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9027 match(Set dst (RShiftVS dst shift)); 9028 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9029 ins_encode %{ 9030 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9036 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9037 match(Set dst (RShiftVS dst shift)); 9038 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9039 ins_encode %{ 9040 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9041 %} 9042 ins_pipe( pipe_slow ); 9043 %} 9044 9045 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9046 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9047 match(Set dst (RShiftVS src shift)); 9048 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9049 ins_encode %{ 9050 int vector_len = 0; 9051 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9052 %} 9053 ins_pipe( pipe_slow ); 9054 %} 9055 9056 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9057 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9058 match(Set dst (RShiftVS src shift)); 9059 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9060 ins_encode %{ 9061 int vector_len = 0; 9062 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9063 %} 9064 ins_pipe( pipe_slow ); 9065 %} 9066 9067 instruct vsra8S(vecX dst, vecS shift) %{ 9068 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9069 match(Set dst (RShiftVS dst shift)); 9070 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9071 ins_encode %{ 9072 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9078 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9079 match(Set dst (RShiftVS dst shift)); 9080 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9081 ins_encode %{ 9082 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9083 %} 9084 ins_pipe( pipe_slow ); 9085 %} 9086 9087 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9088 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9089 match(Set dst (RShiftVS src shift)); 9090 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9091 ins_encode %{ 9092 int vector_len = 0; 9093 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9094 %} 9095 ins_pipe( pipe_slow ); 9096 %} 9097 9098 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9099 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9100 match(Set dst (RShiftVS src shift)); 9101 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9102 ins_encode %{ 9103 int vector_len = 0; 9104 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9110 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9111 match(Set dst (RShiftVS src shift)); 9112 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9113 ins_encode %{ 9114 int vector_len = 1; 9115 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9121 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9122 match(Set dst (RShiftVS src shift)); 9123 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9124 ins_encode %{ 9125 int vector_len = 1; 9126 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9127 %} 9128 ins_pipe( pipe_slow ); 9129 %} 9130 9131 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9132 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9133 match(Set dst (RShiftVS src shift)); 9134 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9135 ins_encode %{ 9136 int vector_len = 2; 9137 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9138 %} 9139 ins_pipe( pipe_slow ); 9140 %} 9141 9142 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9143 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9144 match(Set dst (RShiftVS src shift)); 9145 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9146 ins_encode %{ 9147 int vector_len = 2; 9148 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9149 %} 9150 ins_pipe( pipe_slow ); 9151 %} 9152 9153 // Integers vector arithmetic right shift 9154 instruct vsra2I(vecD dst, vecS shift) %{ 9155 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9156 match(Set dst (RShiftVI dst shift)); 9157 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9158 ins_encode %{ 9159 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9160 %} 9161 ins_pipe( pipe_slow ); 9162 %} 9163 9164 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9165 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9166 match(Set dst (RShiftVI dst shift)); 9167 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9168 ins_encode %{ 9169 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9170 %} 9171 ins_pipe( pipe_slow ); 9172 %} 9173 9174 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9176 match(Set dst (RShiftVI src shift)); 9177 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9178 ins_encode %{ 9179 int vector_len = 0; 9180 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9181 %} 9182 ins_pipe( pipe_slow ); 9183 %} 9184 9185 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9187 match(Set dst (RShiftVI src shift)); 9188 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9189 ins_encode %{ 9190 int vector_len = 0; 9191 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9192 %} 9193 ins_pipe( pipe_slow ); 9194 %} 9195 9196 instruct vsra4I(vecX dst, vecS shift) %{ 9197 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9198 match(Set dst (RShiftVI dst shift)); 9199 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9200 ins_encode %{ 9201 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9207 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9208 match(Set dst (RShiftVI dst shift)); 9209 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9210 ins_encode %{ 9211 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9212 %} 9213 ins_pipe( pipe_slow ); 9214 %} 9215 9216 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9217 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9218 match(Set dst (RShiftVI src shift)); 9219 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9220 ins_encode %{ 9221 int vector_len = 0; 9222 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9223 %} 9224 ins_pipe( pipe_slow ); 9225 %} 9226 9227 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9228 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9229 match(Set dst (RShiftVI src shift)); 9230 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9231 ins_encode %{ 9232 int vector_len = 0; 9233 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9234 %} 9235 ins_pipe( pipe_slow ); 9236 %} 9237 9238 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9239 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9240 match(Set dst (RShiftVI src shift)); 9241 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9242 ins_encode %{ 9243 int vector_len = 1; 9244 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9245 %} 9246 ins_pipe( pipe_slow ); 9247 %} 9248 9249 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9250 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9251 match(Set dst (RShiftVI src shift)); 9252 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9253 ins_encode %{ 9254 int vector_len = 1; 9255 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9261 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9262 match(Set dst (RShiftVI src shift)); 9263 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9264 ins_encode %{ 9265 int vector_len = 2; 9266 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9272 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9273 match(Set dst (RShiftVI src shift)); 9274 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9275 ins_encode %{ 9276 int vector_len = 2; 9277 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 // There are no longs vector arithmetic right shift instructions. 9283 9284 9285 // --------------------------------- AND -------------------------------------- 9286 9287 instruct vand4B(vecS dst, vecS src) %{ 9288 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9289 match(Set dst (AndV dst src)); 9290 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9291 ins_encode %{ 9292 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9293 %} 9294 ins_pipe( pipe_slow ); 9295 %} 9296 9297 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9298 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9299 match(Set dst (AndV src1 src2)); 9300 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9301 ins_encode %{ 9302 int vector_len = 0; 9303 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9304 %} 9305 ins_pipe( pipe_slow ); 9306 %} 9307 9308 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9309 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9310 match(Set dst (AndV src (LoadVector mem))); 9311 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9312 ins_encode %{ 9313 int vector_len = 0; 9314 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9315 %} 9316 ins_pipe( pipe_slow ); 9317 %} 9318 9319 instruct vand8B(vecD dst, vecD src) %{ 9320 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9321 match(Set dst (AndV dst src)); 9322 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9323 ins_encode %{ 9324 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9325 %} 9326 ins_pipe( pipe_slow ); 9327 %} 9328 9329 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 9330 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9331 match(Set dst (AndV src1 src2)); 9332 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 9333 ins_encode %{ 9334 int vector_len = 0; 9335 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9336 %} 9337 ins_pipe( pipe_slow ); 9338 %} 9339 9340 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 9341 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9342 match(Set dst (AndV src (LoadVector mem))); 9343 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 9344 ins_encode %{ 9345 int vector_len = 0; 9346 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9347 %} 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 instruct vand16B(vecX dst, vecX src) %{ 9352 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9353 match(Set dst (AndV dst src)); 9354 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 9355 ins_encode %{ 9356 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 9362 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9363 match(Set dst (AndV src1 src2)); 9364 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 9365 ins_encode %{ 9366 int vector_len = 0; 9367 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9368 %} 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 9373 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9374 match(Set dst (AndV src (LoadVector mem))); 9375 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 9376 ins_encode %{ 9377 int vector_len = 0; 9378 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9379 %} 9380 ins_pipe( pipe_slow ); 9381 %} 9382 9383 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 9384 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9385 match(Set dst (AndV src1 src2)); 9386 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 9387 ins_encode %{ 9388 int vector_len = 1; 9389 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9390 %} 9391 ins_pipe( pipe_slow ); 9392 %} 9393 9394 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 9395 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9396 match(Set dst (AndV src (LoadVector mem))); 9397 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 9398 ins_encode %{ 9399 int vector_len = 1; 9400 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9401 %} 9402 ins_pipe( pipe_slow ); 9403 %} 9404 9405 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9406 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9407 match(Set dst (AndV src1 src2)); 9408 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 9409 ins_encode %{ 9410 int vector_len = 2; 9411 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9412 %} 9413 ins_pipe( pipe_slow ); 9414 %} 9415 9416 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9417 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9418 match(Set dst (AndV src (LoadVector mem))); 9419 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9420 ins_encode %{ 9421 int vector_len = 2; 9422 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 // --------------------------------- OR --------------------------------------- 9428 9429 instruct vor4B(vecS dst, vecS src) %{ 9430 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9431 match(Set dst (OrV dst src)); 9432 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9433 ins_encode %{ 9434 __ por($dst$$XMMRegister, $src$$XMMRegister); 9435 %} 9436 ins_pipe( pipe_slow ); 9437 %} 9438 9439 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9440 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9441 match(Set dst (OrV src1 src2)); 9442 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9443 ins_encode %{ 9444 int vector_len = 0; 9445 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9451 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9452 match(Set dst (OrV src (LoadVector mem))); 9453 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9454 ins_encode %{ 9455 int vector_len = 0; 9456 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 instruct vor8B(vecD dst, vecD src) %{ 9462 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9463 match(Set dst (OrV dst src)); 9464 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9465 ins_encode %{ 9466 __ por($dst$$XMMRegister, $src$$XMMRegister); 9467 %} 9468 ins_pipe( pipe_slow ); 9469 %} 9470 9471 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9472 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9473 match(Set dst (OrV src1 src2)); 9474 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9475 ins_encode %{ 9476 int vector_len = 0; 9477 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9478 %} 9479 ins_pipe( pipe_slow ); 9480 %} 9481 9482 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9483 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9484 match(Set dst (OrV src (LoadVector mem))); 9485 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9486 ins_encode %{ 9487 int vector_len = 0; 9488 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9489 %} 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 instruct vor16B(vecX dst, vecX src) %{ 9494 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9495 match(Set dst (OrV dst src)); 9496 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9497 ins_encode %{ 9498 __ por($dst$$XMMRegister, $src$$XMMRegister); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9504 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9505 match(Set dst (OrV src1 src2)); 9506 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9507 ins_encode %{ 9508 int vector_len = 0; 9509 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9510 %} 9511 ins_pipe( pipe_slow ); 9512 %} 9513 9514 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9515 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9516 match(Set dst (OrV src (LoadVector mem))); 9517 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9518 ins_encode %{ 9519 int vector_len = 0; 9520 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9521 %} 9522 ins_pipe( pipe_slow ); 9523 %} 9524 9525 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9526 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9527 match(Set dst (OrV src1 src2)); 9528 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9529 ins_encode %{ 9530 int vector_len = 1; 9531 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9537 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9538 match(Set dst (OrV src (LoadVector mem))); 9539 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9540 ins_encode %{ 9541 int vector_len = 1; 9542 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9543 %} 9544 ins_pipe( pipe_slow ); 9545 %} 9546 9547 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9548 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9549 match(Set dst (OrV src1 src2)); 9550 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9551 ins_encode %{ 9552 int vector_len = 2; 9553 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9554 %} 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9559 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9560 match(Set dst (OrV src (LoadVector mem))); 9561 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9562 ins_encode %{ 9563 int vector_len = 2; 9564 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 // --------------------------------- XOR -------------------------------------- 9570 9571 instruct vxor4B(vecS dst, vecS src) %{ 9572 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9573 match(Set dst (XorV dst src)); 9574 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9575 ins_encode %{ 9576 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9582 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9583 match(Set dst (XorV src1 src2)); 9584 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9585 ins_encode %{ 9586 int vector_len = 0; 9587 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9588 %} 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9593 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9594 match(Set dst (XorV src (LoadVector mem))); 9595 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9596 ins_encode %{ 9597 int vector_len = 0; 9598 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9599 %} 9600 ins_pipe( pipe_slow ); 9601 %} 9602 9603 instruct vxor8B(vecD dst, vecD src) %{ 9604 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9605 match(Set dst (XorV dst src)); 9606 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9607 ins_encode %{ 9608 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9609 %} 9610 ins_pipe( pipe_slow ); 9611 %} 9612 9613 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9614 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9615 match(Set dst (XorV src1 src2)); 9616 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9617 ins_encode %{ 9618 int vector_len = 0; 9619 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9625 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9626 match(Set dst (XorV src (LoadVector mem))); 9627 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9628 ins_encode %{ 9629 int vector_len = 0; 9630 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9631 %} 9632 ins_pipe( pipe_slow ); 9633 %} 9634 9635 instruct vxor16B(vecX dst, vecX src) %{ 9636 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9637 match(Set dst (XorV dst src)); 9638 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9639 ins_encode %{ 9640 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9641 %} 9642 ins_pipe( pipe_slow ); 9643 %} 9644 9645 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9646 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9647 match(Set dst (XorV src1 src2)); 9648 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9649 ins_encode %{ 9650 int vector_len = 0; 9651 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9652 %} 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9657 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9658 match(Set dst (XorV src (LoadVector mem))); 9659 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9660 ins_encode %{ 9661 int vector_len = 0; 9662 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9663 %} 9664 ins_pipe( pipe_slow ); 9665 %} 9666 9667 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9668 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9669 match(Set dst (XorV src1 src2)); 9670 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9671 ins_encode %{ 9672 int vector_len = 1; 9673 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9679 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9680 match(Set dst (XorV src (LoadVector mem))); 9681 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9682 ins_encode %{ 9683 int vector_len = 1; 9684 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9685 %} 9686 ins_pipe( pipe_slow ); 9687 %} 9688 9689 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9690 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9691 match(Set dst (XorV src1 src2)); 9692 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9693 ins_encode %{ 9694 int vector_len = 2; 9695 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9696 %} 9697 ins_pipe( pipe_slow ); 9698 %} 9699 9700 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9701 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9702 match(Set dst (XorV src (LoadVector mem))); 9703 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9704 ins_encode %{ 9705 int vector_len = 2; 9706 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9707 %} 9708 ins_pipe( pipe_slow ); 9709 %} 9710 9711 // --------------------------------- FMA -------------------------------------- 9712 9713 // a * b + c 9714 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9715 predicate(UseFMA && n->as_Vector()->length() == 2); 9716 match(Set c (FmaVD c (Binary a b))); 9717 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9718 ins_cost(150); 9719 ins_encode %{ 9720 int vector_len = 0; 9721 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9722 %} 9723 ins_pipe( pipe_slow ); 9724 %} 9725 9726 // a * b + c 9727 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9728 predicate(UseFMA && n->as_Vector()->length() == 2); 9729 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9730 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9731 ins_cost(150); 9732 ins_encode %{ 9733 int vector_len = 0; 9734 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9735 %} 9736 ins_pipe( pipe_slow ); 9737 %} 9738 9739 9740 // a * b + c 9741 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9742 predicate(UseFMA && n->as_Vector()->length() == 4); 9743 match(Set c (FmaVD c (Binary a b))); 9744 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9745 ins_cost(150); 9746 ins_encode %{ 9747 int vector_len = 1; 9748 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 // a * b + c 9754 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9755 predicate(UseFMA && n->as_Vector()->length() == 4); 9756 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9757 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9758 ins_cost(150); 9759 ins_encode %{ 9760 int vector_len = 1; 9761 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9762 %} 9763 ins_pipe( pipe_slow ); 9764 %} 9765 9766 // a * b + c 9767 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9768 predicate(UseFMA && n->as_Vector()->length() == 8); 9769 match(Set c (FmaVD c (Binary a b))); 9770 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9771 ins_cost(150); 9772 ins_encode %{ 9773 int vector_len = 2; 9774 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9775 %} 9776 ins_pipe( pipe_slow ); 9777 %} 9778 9779 // a * b + c 9780 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9781 predicate(UseFMA && n->as_Vector()->length() == 8); 9782 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9783 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9784 ins_cost(150); 9785 ins_encode %{ 9786 int vector_len = 2; 9787 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9788 %} 9789 ins_pipe( pipe_slow ); 9790 %} 9791 9792 // a * b + c 9793 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9794 predicate(UseFMA && n->as_Vector()->length() == 4); 9795 match(Set c (FmaVF c (Binary a b))); 9796 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9797 ins_cost(150); 9798 ins_encode %{ 9799 int vector_len = 0; 9800 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9801 %} 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 // a * b + c 9806 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9807 predicate(UseFMA && n->as_Vector()->length() == 4); 9808 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9809 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9810 ins_cost(150); 9811 ins_encode %{ 9812 int vector_len = 0; 9813 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9814 %} 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 // a * b + c 9819 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9820 predicate(UseFMA && n->as_Vector()->length() == 8); 9821 match(Set c (FmaVF c (Binary a b))); 9822 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9823 ins_cost(150); 9824 ins_encode %{ 9825 int vector_len = 1; 9826 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9827 %} 9828 ins_pipe( pipe_slow ); 9829 %} 9830 9831 // a * b + c 9832 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9833 predicate(UseFMA && n->as_Vector()->length() == 8); 9834 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9835 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9836 ins_cost(150); 9837 ins_encode %{ 9838 int vector_len = 1; 9839 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9840 %} 9841 ins_pipe( pipe_slow ); 9842 %} 9843 9844 // a * b + c 9845 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9846 predicate(UseFMA && n->as_Vector()->length() == 16); 9847 match(Set c (FmaVF c (Binary a b))); 9848 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9849 ins_cost(150); 9850 ins_encode %{ 9851 int vector_len = 2; 9852 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9853 %} 9854 ins_pipe( pipe_slow ); 9855 %} 9856 9857 // a * b + c 9858 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9859 predicate(UseFMA && n->as_Vector()->length() == 16); 9860 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9861 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9862 ins_cost(150); 9863 ins_encode %{ 9864 int vector_len = 2; 9865 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9866 %} 9867 ins_pipe( pipe_slow ); 9868 %} 9869 9870 // --------------------------------- Vector Multiply Add -------------------------------------- 9871 9872 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9873 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9874 match(Set dst (MulAddVS2VI dst src1)); 9875 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9876 ins_encode %{ 9877 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9878 %} 9879 ins_pipe( pipe_slow ); 9880 %} 9881 9882 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9883 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9884 match(Set dst (MulAddVS2VI src1 src2)); 9885 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9886 ins_encode %{ 9887 int vector_len = 0; 9888 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9889 %} 9890 ins_pipe( pipe_slow ); 9891 %} 9892 9893 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9894 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9895 match(Set dst (MulAddVS2VI dst src1)); 9896 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9897 ins_encode %{ 9898 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9899 %} 9900 ins_pipe( pipe_slow ); 9901 %} 9902 9903 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9904 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9905 match(Set dst (MulAddVS2VI src1 src2)); 9906 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9907 ins_encode %{ 9908 int vector_len = 0; 9909 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9910 %} 9911 ins_pipe( pipe_slow ); 9912 %} 9913 9914 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9915 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9916 match(Set dst (MulAddVS2VI src1 src2)); 9917 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9918 ins_encode %{ 9919 int vector_len = 1; 9920 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9921 %} 9922 ins_pipe( pipe_slow ); 9923 %} 9924 9925 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9926 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9927 match(Set dst (MulAddVS2VI src1 src2)); 9928 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9929 ins_encode %{ 9930 int vector_len = 2; 9931 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9932 %} 9933 ins_pipe( pipe_slow ); 9934 %} 9935 9936 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9937 9938 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9939 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9940 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9941 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9942 ins_encode %{ 9943 int vector_len = 0; 9944 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9945 %} 9946 ins_pipe( pipe_slow ); 9947 ins_cost(10); 9948 %} 9949 9950 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9951 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9952 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9953 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9954 ins_encode %{ 9955 int vector_len = 0; 9956 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9957 %} 9958 ins_pipe( pipe_slow ); 9959 ins_cost(10); 9960 %} 9961 9962 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9963 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9964 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9965 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9966 ins_encode %{ 9967 int vector_len = 1; 9968 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9969 %} 9970 ins_pipe( pipe_slow ); 9971 ins_cost(10); 9972 %} 9973 9974 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9975 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9976 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9977 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9978 ins_encode %{ 9979 int vector_len = 2; 9980 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9981 %} 9982 ins_pipe( pipe_slow ); 9983 ins_cost(10); 9984 %} 9985 9986 // --------------------------------- PopCount -------------------------------------- 9987 9988 instruct vpopcount2I(vecD dst, vecD src) %{ 9989 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9990 match(Set dst (PopCountVI src)); 9991 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9992 ins_encode %{ 9993 int vector_len = 0; 9994 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9995 %} 9996 ins_pipe( pipe_slow ); 9997 %} 9998 9999 instruct vpopcount4I(vecX dst, vecX src) %{ 10000 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 10001 match(Set dst (PopCountVI src)); 10002 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 10003 ins_encode %{ 10004 int vector_len = 0; 10005 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10006 %} 10007 ins_pipe( pipe_slow ); 10008 %} 10009 10010 instruct vpopcount8I(vecY dst, vecY src) %{ 10011 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10012 match(Set dst (PopCountVI src)); 10013 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10014 ins_encode %{ 10015 int vector_len = 1; 10016 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10017 %} 10018 ins_pipe( pipe_slow ); 10019 %} 10020 10021 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10022 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10023 match(Set dst (PopCountVI src)); 10024 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10025 ins_encode %{ 10026 int vector_len = 2; 10027 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10028 %} 10029 ins_pipe( pipe_slow ); 10030 %}