1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1425 case Op_CompareAndSwapL: 1426 #ifdef _LP64 1427 case Op_CompareAndSwapP: 1428 #endif 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 } 1454 1455 return ret_value; // Per default match rules are supported. 1456 } 1457 1458 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1459 // identify extra cases that we might want to provide match rules for 1460 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1461 bool ret_value = match_rule_supported(opcode); 1462 if (ret_value) { 1463 switch (opcode) { 1464 case Op_AddVB: 1465 case Op_SubVB: 1466 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1467 ret_value = false; 1468 break; 1469 case Op_URShiftVS: 1470 case Op_RShiftVS: 1471 case Op_LShiftVS: 1472 case Op_MulVS: 1473 case Op_AddVS: 1474 case Op_SubVS: 1475 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1476 ret_value = false; 1477 break; 1478 case Op_CMoveVF: 1479 if (vlen != 8) 1480 ret_value = false; 1481 break; 1482 case Op_CMoveVD: 1483 if (vlen != 4) 1484 ret_value = false; 1485 break; 1486 } 1487 } 1488 1489 return ret_value; // Per default match rules are supported. 1490 } 1491 1492 const bool Matcher::has_predicated_vectors(void) { 1493 bool ret_value = false; 1494 if (UseAVX > 2) { 1495 ret_value = VM_Version::supports_avx512vl(); 1496 } 1497 1498 return ret_value; 1499 } 1500 1501 const int Matcher::float_pressure(int default_pressure_threshold) { 1502 int float_pressure_threshold = default_pressure_threshold; 1503 #ifdef _LP64 1504 if (UseAVX > 2) { 1505 // Increase pressure threshold on machines with AVX3 which have 1506 // 2x more XMM registers. 1507 float_pressure_threshold = default_pressure_threshold * 2; 1508 } 1509 #endif 1510 return float_pressure_threshold; 1511 } 1512 1513 // Max vector size in bytes. 0 if not supported. 1514 const int Matcher::vector_width_in_bytes(BasicType bt) { 1515 assert(is_java_primitive(bt), "only primitive type vectors"); 1516 if (UseSSE < 2) return 0; 1517 // SSE2 supports 128bit vectors for all types. 1518 // AVX2 supports 256bit vectors for all types. 1519 // AVX2/EVEX supports 512bit vectors for all types. 1520 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1521 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1522 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1523 size = (UseAVX > 2) ? 64 : 32; 1524 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1525 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1526 // Use flag to limit vector size. 1527 size = MIN2(size,(int)MaxVectorSize); 1528 // Minimum 2 values in vector (or 4 for bytes). 1529 switch (bt) { 1530 case T_DOUBLE: 1531 case T_LONG: 1532 if (size < 16) return 0; 1533 break; 1534 case T_FLOAT: 1535 case T_INT: 1536 if (size < 8) return 0; 1537 break; 1538 case T_BOOLEAN: 1539 if (size < 4) return 0; 1540 break; 1541 case T_CHAR: 1542 if (size < 4) return 0; 1543 break; 1544 case T_BYTE: 1545 if (size < 4) return 0; 1546 break; 1547 case T_SHORT: 1548 if (size < 4) return 0; 1549 break; 1550 default: 1551 ShouldNotReachHere(); 1552 } 1553 return size; 1554 } 1555 1556 // Limits on vector size (number of elements) loaded into vector. 1557 const int Matcher::max_vector_size(const BasicType bt) { 1558 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1559 } 1560 const int Matcher::min_vector_size(const BasicType bt) { 1561 int max_size = max_vector_size(bt); 1562 // Min size which can be loaded into vector is 4 bytes. 1563 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1564 return MIN2(size,max_size); 1565 } 1566 1567 // Vector ideal reg corresponding to specified size in bytes 1568 const uint Matcher::vector_ideal_reg(int size) { 1569 assert(MaxVectorSize >= size, ""); 1570 switch(size) { 1571 case 4: return Op_VecS; 1572 case 8: return Op_VecD; 1573 case 16: return Op_VecX; 1574 case 32: return Op_VecY; 1575 case 64: return Op_VecZ; 1576 } 1577 ShouldNotReachHere(); 1578 return 0; 1579 } 1580 1581 // Only lowest bits of xmm reg are used for vector shift count. 1582 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1583 return Op_VecS; 1584 } 1585 1586 // x86 supports misaligned vectors store/load. 1587 const bool Matcher::misaligned_vectors_ok() { 1588 return true; 1589 } 1590 1591 // x86 AES instructions are compatible with SunJCE expanded 1592 // keys, hence we do not need to pass the original key to stubs 1593 const bool Matcher::pass_original_key_for_aes() { 1594 return false; 1595 } 1596 1597 1598 const bool Matcher::convi2l_type_required = true; 1599 1600 // Check for shift by small constant as well 1601 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1602 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1603 shift->in(2)->get_int() <= 3 && 1604 // Are there other uses besides address expressions? 1605 !matcher->is_visited(shift)) { 1606 address_visited.set(shift->_idx); // Flag as address_visited 1607 mstack.push(shift->in(2), Matcher::Visit); 1608 Node *conv = shift->in(1); 1609 #ifdef _LP64 1610 // Allow Matcher to match the rule which bypass 1611 // ConvI2L operation for an array index on LP64 1612 // if the index value is positive. 1613 if (conv->Opcode() == Op_ConvI2L && 1614 conv->as_Type()->type()->is_long()->_lo >= 0 && 1615 // Are there other uses besides address expressions? 1616 !matcher->is_visited(conv)) { 1617 address_visited.set(conv->_idx); // Flag as address_visited 1618 mstack.push(conv->in(1), Matcher::Pre_Visit); 1619 } else 1620 #endif 1621 mstack.push(conv, Matcher::Pre_Visit); 1622 return true; 1623 } 1624 return false; 1625 } 1626 1627 // Should the Matcher clone shifts on addressing modes, expecting them 1628 // to be subsumed into complex addressing expressions or compute them 1629 // into registers? 1630 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1631 Node *off = m->in(AddPNode::Offset); 1632 if (off->is_Con()) { 1633 address_visited.test_set(m->_idx); // Flag as address_visited 1634 Node *adr = m->in(AddPNode::Address); 1635 1636 // Intel can handle 2 adds in addressing mode 1637 // AtomicAdd is not an addressing expression. 1638 // Cheap to find it by looking for screwy base. 1639 if (adr->is_AddP() && 1640 !adr->in(AddPNode::Base)->is_top() && 1641 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1642 // Are there other uses besides address expressions? 1643 !is_visited(adr)) { 1644 address_visited.set(adr->_idx); // Flag as address_visited 1645 Node *shift = adr->in(AddPNode::Offset); 1646 if (!clone_shift(shift, this, mstack, address_visited)) { 1647 mstack.push(shift, Pre_Visit); 1648 } 1649 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1650 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1651 } else { 1652 mstack.push(adr, Pre_Visit); 1653 } 1654 1655 // Clone X+offset as it also folds into most addressing expressions 1656 mstack.push(off, Visit); 1657 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1658 return true; 1659 } else if (clone_shift(off, this, mstack, address_visited)) { 1660 address_visited.test_set(m->_idx); // Flag as address_visited 1661 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1662 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1663 return true; 1664 } 1665 return false; 1666 } 1667 1668 void Compile::reshape_address(AddPNode* addp) { 1669 } 1670 1671 // Helper methods for MachSpillCopyNode::implementation(). 1672 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1673 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1674 // In 64-bit VM size calculation is very complex. Emitting instructions 1675 // into scratch buffer is used to get size in 64-bit VM. 1676 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1677 assert(ireg == Op_VecS || // 32bit vector 1678 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1679 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1680 "no non-adjacent vector moves" ); 1681 if (cbuf) { 1682 MacroAssembler _masm(cbuf); 1683 int offset = __ offset(); 1684 switch (ireg) { 1685 case Op_VecS: // copy whole register 1686 case Op_VecD: 1687 case Op_VecX: 1688 #ifndef _LP64 1689 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1690 #else 1691 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1692 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1693 } else { 1694 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1695 } 1696 #endif 1697 break; 1698 case Op_VecY: 1699 #ifndef _LP64 1700 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1701 #else 1702 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1703 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1704 } else { 1705 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1706 } 1707 #endif 1708 break; 1709 case Op_VecZ: 1710 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1711 break; 1712 default: 1713 ShouldNotReachHere(); 1714 } 1715 int size = __ offset() - offset; 1716 #ifdef ASSERT 1717 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1718 assert(!do_size || size == 4, "incorrect size calculattion"); 1719 #endif 1720 return size; 1721 #ifndef PRODUCT 1722 } else if (!do_size) { 1723 switch (ireg) { 1724 case Op_VecS: 1725 case Op_VecD: 1726 case Op_VecX: 1727 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1728 break; 1729 case Op_VecY: 1730 case Op_VecZ: 1731 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1732 break; 1733 default: 1734 ShouldNotReachHere(); 1735 } 1736 #endif 1737 } 1738 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1739 return (UseAVX > 2) ? 6 : 4; 1740 } 1741 1742 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1743 int stack_offset, int reg, uint ireg, outputStream* st) { 1744 // In 64-bit VM size calculation is very complex. Emitting instructions 1745 // into scratch buffer is used to get size in 64-bit VM. 1746 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1747 if (cbuf) { 1748 MacroAssembler _masm(cbuf); 1749 int offset = __ offset(); 1750 if (is_load) { 1751 switch (ireg) { 1752 case Op_VecS: 1753 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1754 break; 1755 case Op_VecD: 1756 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1757 break; 1758 case Op_VecX: 1759 #ifndef _LP64 1760 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1761 #else 1762 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1763 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1764 } else { 1765 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1766 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1767 } 1768 #endif 1769 break; 1770 case Op_VecY: 1771 #ifndef _LP64 1772 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1773 #else 1774 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1775 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1776 } else { 1777 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1778 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1779 } 1780 #endif 1781 break; 1782 case Op_VecZ: 1783 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1784 break; 1785 default: 1786 ShouldNotReachHere(); 1787 } 1788 } else { // store 1789 switch (ireg) { 1790 case Op_VecS: 1791 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1792 break; 1793 case Op_VecD: 1794 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1795 break; 1796 case Op_VecX: 1797 #ifndef _LP64 1798 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1799 #else 1800 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1801 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1802 } 1803 else { 1804 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1805 } 1806 #endif 1807 break; 1808 case Op_VecY: 1809 #ifndef _LP64 1810 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1811 #else 1812 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1813 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1814 } 1815 else { 1816 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1817 } 1818 #endif 1819 break; 1820 case Op_VecZ: 1821 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1822 break; 1823 default: 1824 ShouldNotReachHere(); 1825 } 1826 } 1827 int size = __ offset() - offset; 1828 #ifdef ASSERT 1829 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1830 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1831 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1832 #endif 1833 return size; 1834 #ifndef PRODUCT 1835 } else if (!do_size) { 1836 if (is_load) { 1837 switch (ireg) { 1838 case Op_VecS: 1839 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1840 break; 1841 case Op_VecD: 1842 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1843 break; 1844 case Op_VecX: 1845 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1846 break; 1847 case Op_VecY: 1848 case Op_VecZ: 1849 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1850 break; 1851 default: 1852 ShouldNotReachHere(); 1853 } 1854 } else { // store 1855 switch (ireg) { 1856 case Op_VecS: 1857 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1858 break; 1859 case Op_VecD: 1860 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1861 break; 1862 case Op_VecX: 1863 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1864 break; 1865 case Op_VecY: 1866 case Op_VecZ: 1867 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1868 break; 1869 default: 1870 ShouldNotReachHere(); 1871 } 1872 } 1873 #endif 1874 } 1875 bool is_single_byte = false; 1876 int vec_len = 0; 1877 if ((UseAVX > 2) && (stack_offset != 0)) { 1878 int tuple_type = Assembler::EVEX_FVM; 1879 int input_size = Assembler::EVEX_32bit; 1880 switch (ireg) { 1881 case Op_VecS: 1882 tuple_type = Assembler::EVEX_T1S; 1883 break; 1884 case Op_VecD: 1885 tuple_type = Assembler::EVEX_T1S; 1886 input_size = Assembler::EVEX_64bit; 1887 break; 1888 case Op_VecX: 1889 break; 1890 case Op_VecY: 1891 vec_len = 1; 1892 break; 1893 case Op_VecZ: 1894 vec_len = 2; 1895 break; 1896 } 1897 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1898 } 1899 int offset_size = 0; 1900 int size = 5; 1901 if (UseAVX > 2 ) { 1902 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1903 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1904 size += 2; // Need an additional two bytes for EVEX encoding 1905 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1906 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1907 } else { 1908 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1909 size += 2; // Need an additional two bytes for EVEX encodding 1910 } 1911 } else { 1912 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1913 } 1914 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1915 return size+offset_size; 1916 } 1917 1918 static inline jint replicate4_imm(int con, int width) { 1919 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1920 assert(width == 1 || width == 2, "only byte or short types here"); 1921 int bit_width = width * 8; 1922 jint val = con; 1923 val &= (1 << bit_width) - 1; // mask off sign bits 1924 while(bit_width < 32) { 1925 val |= (val << bit_width); 1926 bit_width <<= 1; 1927 } 1928 return val; 1929 } 1930 1931 static inline jlong replicate8_imm(int con, int width) { 1932 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1933 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1934 int bit_width = width * 8; 1935 jlong val = con; 1936 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1937 while(bit_width < 64) { 1938 val |= (val << bit_width); 1939 bit_width <<= 1; 1940 } 1941 return val; 1942 } 1943 1944 #ifndef PRODUCT 1945 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1946 st->print("nop \t# %d bytes pad for loops and calls", _count); 1947 } 1948 #endif 1949 1950 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1951 MacroAssembler _masm(&cbuf); 1952 __ nop(_count); 1953 } 1954 1955 uint MachNopNode::size(PhaseRegAlloc*) const { 1956 return _count; 1957 } 1958 1959 #ifndef PRODUCT 1960 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1961 st->print("# breakpoint"); 1962 } 1963 #endif 1964 1965 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1966 MacroAssembler _masm(&cbuf); 1967 __ int3(); 1968 } 1969 1970 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1971 return MachNode::size(ra_); 1972 } 1973 1974 %} 1975 1976 encode %{ 1977 1978 enc_class call_epilog %{ 1979 if (VerifyStackAtCalls) { 1980 // Check that stack depth is unchanged: find majik cookie on stack 1981 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1982 MacroAssembler _masm(&cbuf); 1983 Label L; 1984 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1985 __ jccb(Assembler::equal, L); 1986 // Die if stack mismatch 1987 __ int3(); 1988 __ bind(L); 1989 } 1990 %} 1991 1992 %} 1993 1994 1995 //----------OPERANDS----------------------------------------------------------- 1996 // Operand definitions must precede instruction definitions for correct parsing 1997 // in the ADLC because operands constitute user defined types which are used in 1998 // instruction definitions. 1999 2000 operand vecZ() %{ 2001 constraint(ALLOC_IN_RC(vectorz_reg)); 2002 match(VecZ); 2003 2004 format %{ %} 2005 interface(REG_INTER); 2006 %} 2007 2008 operand legVecZ() %{ 2009 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2010 match(VecZ); 2011 2012 format %{ %} 2013 interface(REG_INTER); 2014 %} 2015 2016 // Comparison Code for FP conditional move 2017 operand cmpOp_vcmppd() %{ 2018 match(Bool); 2019 2020 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2021 n->as_Bool()->_test._test != BoolTest::no_overflow); 2022 format %{ "" %} 2023 interface(COND_INTER) %{ 2024 equal (0x0, "eq"); 2025 less (0x1, "lt"); 2026 less_equal (0x2, "le"); 2027 not_equal (0xC, "ne"); 2028 greater_equal(0xD, "ge"); 2029 greater (0xE, "gt"); 2030 //TODO cannot compile (adlc breaks) without two next lines with error: 2031 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2032 // equal' for overflow. 2033 overflow (0x20, "o"); // not really supported by the instruction 2034 no_overflow (0x21, "no"); // not really supported by the instruction 2035 %} 2036 %} 2037 2038 2039 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2040 2041 // ============================================================================ 2042 2043 instruct ShouldNotReachHere() %{ 2044 match(Halt); 2045 format %{ "ud2\t# ShouldNotReachHere" %} 2046 ins_encode %{ 2047 __ ud2(); 2048 %} 2049 ins_pipe(pipe_slow); 2050 %} 2051 2052 // =================================EVEX special=============================== 2053 2054 instruct setMask(rRegI dst, rRegI src) %{ 2055 predicate(Matcher::has_predicated_vectors()); 2056 match(Set dst (SetVectMaskI src)); 2057 effect(TEMP dst); 2058 format %{ "setvectmask $dst, $src" %} 2059 ins_encode %{ 2060 __ setvectmask($dst$$Register, $src$$Register); 2061 %} 2062 ins_pipe(pipe_slow); 2063 %} 2064 2065 // ============================================================================ 2066 2067 instruct addF_reg(regF dst, regF src) %{ 2068 predicate((UseSSE>=1) && (UseAVX == 0)); 2069 match(Set dst (AddF dst src)); 2070 2071 format %{ "addss $dst, $src" %} 2072 ins_cost(150); 2073 ins_encode %{ 2074 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2075 %} 2076 ins_pipe(pipe_slow); 2077 %} 2078 2079 instruct addF_mem(regF dst, memory src) %{ 2080 predicate((UseSSE>=1) && (UseAVX == 0)); 2081 match(Set dst (AddF dst (LoadF src))); 2082 2083 format %{ "addss $dst, $src" %} 2084 ins_cost(150); 2085 ins_encode %{ 2086 __ addss($dst$$XMMRegister, $src$$Address); 2087 %} 2088 ins_pipe(pipe_slow); 2089 %} 2090 2091 instruct addF_imm(regF dst, immF con) %{ 2092 predicate((UseSSE>=1) && (UseAVX == 0)); 2093 match(Set dst (AddF dst con)); 2094 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2095 ins_cost(150); 2096 ins_encode %{ 2097 __ addss($dst$$XMMRegister, $constantaddress($con)); 2098 %} 2099 ins_pipe(pipe_slow); 2100 %} 2101 2102 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2103 predicate(UseAVX > 0); 2104 match(Set dst (AddF src1 src2)); 2105 2106 format %{ "vaddss $dst, $src1, $src2" %} 2107 ins_cost(150); 2108 ins_encode %{ 2109 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2110 %} 2111 ins_pipe(pipe_slow); 2112 %} 2113 2114 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2115 predicate(UseAVX > 0); 2116 match(Set dst (AddF src1 (LoadF src2))); 2117 2118 format %{ "vaddss $dst, $src1, $src2" %} 2119 ins_cost(150); 2120 ins_encode %{ 2121 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2122 %} 2123 ins_pipe(pipe_slow); 2124 %} 2125 2126 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2127 predicate(UseAVX > 0); 2128 match(Set dst (AddF src con)); 2129 2130 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2131 ins_cost(150); 2132 ins_encode %{ 2133 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2134 %} 2135 ins_pipe(pipe_slow); 2136 %} 2137 2138 instruct addD_reg(regD dst, regD src) %{ 2139 predicate((UseSSE>=2) && (UseAVX == 0)); 2140 match(Set dst (AddD dst src)); 2141 2142 format %{ "addsd $dst, $src" %} 2143 ins_cost(150); 2144 ins_encode %{ 2145 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2146 %} 2147 ins_pipe(pipe_slow); 2148 %} 2149 2150 instruct addD_mem(regD dst, memory src) %{ 2151 predicate((UseSSE>=2) && (UseAVX == 0)); 2152 match(Set dst (AddD dst (LoadD src))); 2153 2154 format %{ "addsd $dst, $src" %} 2155 ins_cost(150); 2156 ins_encode %{ 2157 __ addsd($dst$$XMMRegister, $src$$Address); 2158 %} 2159 ins_pipe(pipe_slow); 2160 %} 2161 2162 instruct addD_imm(regD dst, immD con) %{ 2163 predicate((UseSSE>=2) && (UseAVX == 0)); 2164 match(Set dst (AddD dst con)); 2165 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2166 ins_cost(150); 2167 ins_encode %{ 2168 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2169 %} 2170 ins_pipe(pipe_slow); 2171 %} 2172 2173 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2174 predicate(UseAVX > 0); 2175 match(Set dst (AddD src1 src2)); 2176 2177 format %{ "vaddsd $dst, $src1, $src2" %} 2178 ins_cost(150); 2179 ins_encode %{ 2180 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2181 %} 2182 ins_pipe(pipe_slow); 2183 %} 2184 2185 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2186 predicate(UseAVX > 0); 2187 match(Set dst (AddD src1 (LoadD src2))); 2188 2189 format %{ "vaddsd $dst, $src1, $src2" %} 2190 ins_cost(150); 2191 ins_encode %{ 2192 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2193 %} 2194 ins_pipe(pipe_slow); 2195 %} 2196 2197 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2198 predicate(UseAVX > 0); 2199 match(Set dst (AddD src con)); 2200 2201 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2202 ins_cost(150); 2203 ins_encode %{ 2204 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2205 %} 2206 ins_pipe(pipe_slow); 2207 %} 2208 2209 instruct subF_reg(regF dst, regF src) %{ 2210 predicate((UseSSE>=1) && (UseAVX == 0)); 2211 match(Set dst (SubF dst src)); 2212 2213 format %{ "subss $dst, $src" %} 2214 ins_cost(150); 2215 ins_encode %{ 2216 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2217 %} 2218 ins_pipe(pipe_slow); 2219 %} 2220 2221 instruct subF_mem(regF dst, memory src) %{ 2222 predicate((UseSSE>=1) && (UseAVX == 0)); 2223 match(Set dst (SubF dst (LoadF src))); 2224 2225 format %{ "subss $dst, $src" %} 2226 ins_cost(150); 2227 ins_encode %{ 2228 __ subss($dst$$XMMRegister, $src$$Address); 2229 %} 2230 ins_pipe(pipe_slow); 2231 %} 2232 2233 instruct subF_imm(regF dst, immF con) %{ 2234 predicate((UseSSE>=1) && (UseAVX == 0)); 2235 match(Set dst (SubF dst con)); 2236 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2237 ins_cost(150); 2238 ins_encode %{ 2239 __ subss($dst$$XMMRegister, $constantaddress($con)); 2240 %} 2241 ins_pipe(pipe_slow); 2242 %} 2243 2244 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2245 predicate(UseAVX > 0); 2246 match(Set dst (SubF src1 src2)); 2247 2248 format %{ "vsubss $dst, $src1, $src2" %} 2249 ins_cost(150); 2250 ins_encode %{ 2251 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2252 %} 2253 ins_pipe(pipe_slow); 2254 %} 2255 2256 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2257 predicate(UseAVX > 0); 2258 match(Set dst (SubF src1 (LoadF src2))); 2259 2260 format %{ "vsubss $dst, $src1, $src2" %} 2261 ins_cost(150); 2262 ins_encode %{ 2263 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2264 %} 2265 ins_pipe(pipe_slow); 2266 %} 2267 2268 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2269 predicate(UseAVX > 0); 2270 match(Set dst (SubF src con)); 2271 2272 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2273 ins_cost(150); 2274 ins_encode %{ 2275 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2276 %} 2277 ins_pipe(pipe_slow); 2278 %} 2279 2280 instruct subD_reg(regD dst, regD src) %{ 2281 predicate((UseSSE>=2) && (UseAVX == 0)); 2282 match(Set dst (SubD dst src)); 2283 2284 format %{ "subsd $dst, $src" %} 2285 ins_cost(150); 2286 ins_encode %{ 2287 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2288 %} 2289 ins_pipe(pipe_slow); 2290 %} 2291 2292 instruct subD_mem(regD dst, memory src) %{ 2293 predicate((UseSSE>=2) && (UseAVX == 0)); 2294 match(Set dst (SubD dst (LoadD src))); 2295 2296 format %{ "subsd $dst, $src" %} 2297 ins_cost(150); 2298 ins_encode %{ 2299 __ subsd($dst$$XMMRegister, $src$$Address); 2300 %} 2301 ins_pipe(pipe_slow); 2302 %} 2303 2304 instruct subD_imm(regD dst, immD con) %{ 2305 predicate((UseSSE>=2) && (UseAVX == 0)); 2306 match(Set dst (SubD dst con)); 2307 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2308 ins_cost(150); 2309 ins_encode %{ 2310 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2311 %} 2312 ins_pipe(pipe_slow); 2313 %} 2314 2315 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2316 predicate(UseAVX > 0); 2317 match(Set dst (SubD src1 src2)); 2318 2319 format %{ "vsubsd $dst, $src1, $src2" %} 2320 ins_cost(150); 2321 ins_encode %{ 2322 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2323 %} 2324 ins_pipe(pipe_slow); 2325 %} 2326 2327 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2328 predicate(UseAVX > 0); 2329 match(Set dst (SubD src1 (LoadD src2))); 2330 2331 format %{ "vsubsd $dst, $src1, $src2" %} 2332 ins_cost(150); 2333 ins_encode %{ 2334 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2335 %} 2336 ins_pipe(pipe_slow); 2337 %} 2338 2339 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2340 predicate(UseAVX > 0); 2341 match(Set dst (SubD src con)); 2342 2343 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2344 ins_cost(150); 2345 ins_encode %{ 2346 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2347 %} 2348 ins_pipe(pipe_slow); 2349 %} 2350 2351 instruct mulF_reg(regF dst, regF src) %{ 2352 predicate((UseSSE>=1) && (UseAVX == 0)); 2353 match(Set dst (MulF dst src)); 2354 2355 format %{ "mulss $dst, $src" %} 2356 ins_cost(150); 2357 ins_encode %{ 2358 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2359 %} 2360 ins_pipe(pipe_slow); 2361 %} 2362 2363 instruct mulF_mem(regF dst, memory src) %{ 2364 predicate((UseSSE>=1) && (UseAVX == 0)); 2365 match(Set dst (MulF dst (LoadF src))); 2366 2367 format %{ "mulss $dst, $src" %} 2368 ins_cost(150); 2369 ins_encode %{ 2370 __ mulss($dst$$XMMRegister, $src$$Address); 2371 %} 2372 ins_pipe(pipe_slow); 2373 %} 2374 2375 instruct mulF_imm(regF dst, immF con) %{ 2376 predicate((UseSSE>=1) && (UseAVX == 0)); 2377 match(Set dst (MulF dst con)); 2378 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2379 ins_cost(150); 2380 ins_encode %{ 2381 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2382 %} 2383 ins_pipe(pipe_slow); 2384 %} 2385 2386 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2387 predicate(UseAVX > 0); 2388 match(Set dst (MulF src1 src2)); 2389 2390 format %{ "vmulss $dst, $src1, $src2" %} 2391 ins_cost(150); 2392 ins_encode %{ 2393 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2394 %} 2395 ins_pipe(pipe_slow); 2396 %} 2397 2398 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2399 predicate(UseAVX > 0); 2400 match(Set dst (MulF src1 (LoadF src2))); 2401 2402 format %{ "vmulss $dst, $src1, $src2" %} 2403 ins_cost(150); 2404 ins_encode %{ 2405 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2406 %} 2407 ins_pipe(pipe_slow); 2408 %} 2409 2410 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2411 predicate(UseAVX > 0); 2412 match(Set dst (MulF src con)); 2413 2414 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2415 ins_cost(150); 2416 ins_encode %{ 2417 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2418 %} 2419 ins_pipe(pipe_slow); 2420 %} 2421 2422 instruct mulD_reg(regD dst, regD src) %{ 2423 predicate((UseSSE>=2) && (UseAVX == 0)); 2424 match(Set dst (MulD dst src)); 2425 2426 format %{ "mulsd $dst, $src" %} 2427 ins_cost(150); 2428 ins_encode %{ 2429 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2430 %} 2431 ins_pipe(pipe_slow); 2432 %} 2433 2434 instruct mulD_mem(regD dst, memory src) %{ 2435 predicate((UseSSE>=2) && (UseAVX == 0)); 2436 match(Set dst (MulD dst (LoadD src))); 2437 2438 format %{ "mulsd $dst, $src" %} 2439 ins_cost(150); 2440 ins_encode %{ 2441 __ mulsd($dst$$XMMRegister, $src$$Address); 2442 %} 2443 ins_pipe(pipe_slow); 2444 %} 2445 2446 instruct mulD_imm(regD dst, immD con) %{ 2447 predicate((UseSSE>=2) && (UseAVX == 0)); 2448 match(Set dst (MulD dst con)); 2449 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2450 ins_cost(150); 2451 ins_encode %{ 2452 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2453 %} 2454 ins_pipe(pipe_slow); 2455 %} 2456 2457 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2458 predicate(UseAVX > 0); 2459 match(Set dst (MulD src1 src2)); 2460 2461 format %{ "vmulsd $dst, $src1, $src2" %} 2462 ins_cost(150); 2463 ins_encode %{ 2464 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2465 %} 2466 ins_pipe(pipe_slow); 2467 %} 2468 2469 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2470 predicate(UseAVX > 0); 2471 match(Set dst (MulD src1 (LoadD src2))); 2472 2473 format %{ "vmulsd $dst, $src1, $src2" %} 2474 ins_cost(150); 2475 ins_encode %{ 2476 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2477 %} 2478 ins_pipe(pipe_slow); 2479 %} 2480 2481 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2482 predicate(UseAVX > 0); 2483 match(Set dst (MulD src con)); 2484 2485 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2486 ins_cost(150); 2487 ins_encode %{ 2488 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2489 %} 2490 ins_pipe(pipe_slow); 2491 %} 2492 2493 instruct divF_reg(regF dst, regF src) %{ 2494 predicate((UseSSE>=1) && (UseAVX == 0)); 2495 match(Set dst (DivF dst src)); 2496 2497 format %{ "divss $dst, $src" %} 2498 ins_cost(150); 2499 ins_encode %{ 2500 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2501 %} 2502 ins_pipe(pipe_slow); 2503 %} 2504 2505 instruct divF_mem(regF dst, memory src) %{ 2506 predicate((UseSSE>=1) && (UseAVX == 0)); 2507 match(Set dst (DivF dst (LoadF src))); 2508 2509 format %{ "divss $dst, $src" %} 2510 ins_cost(150); 2511 ins_encode %{ 2512 __ divss($dst$$XMMRegister, $src$$Address); 2513 %} 2514 ins_pipe(pipe_slow); 2515 %} 2516 2517 instruct divF_imm(regF dst, immF con) %{ 2518 predicate((UseSSE>=1) && (UseAVX == 0)); 2519 match(Set dst (DivF dst con)); 2520 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2521 ins_cost(150); 2522 ins_encode %{ 2523 __ divss($dst$$XMMRegister, $constantaddress($con)); 2524 %} 2525 ins_pipe(pipe_slow); 2526 %} 2527 2528 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2529 predicate(UseAVX > 0); 2530 match(Set dst (DivF src1 src2)); 2531 2532 format %{ "vdivss $dst, $src1, $src2" %} 2533 ins_cost(150); 2534 ins_encode %{ 2535 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2536 %} 2537 ins_pipe(pipe_slow); 2538 %} 2539 2540 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2541 predicate(UseAVX > 0); 2542 match(Set dst (DivF src1 (LoadF src2))); 2543 2544 format %{ "vdivss $dst, $src1, $src2" %} 2545 ins_cost(150); 2546 ins_encode %{ 2547 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2548 %} 2549 ins_pipe(pipe_slow); 2550 %} 2551 2552 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2553 predicate(UseAVX > 0); 2554 match(Set dst (DivF src con)); 2555 2556 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2557 ins_cost(150); 2558 ins_encode %{ 2559 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2560 %} 2561 ins_pipe(pipe_slow); 2562 %} 2563 2564 instruct divD_reg(regD dst, regD src) %{ 2565 predicate((UseSSE>=2) && (UseAVX == 0)); 2566 match(Set dst (DivD dst src)); 2567 2568 format %{ "divsd $dst, $src" %} 2569 ins_cost(150); 2570 ins_encode %{ 2571 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2572 %} 2573 ins_pipe(pipe_slow); 2574 %} 2575 2576 instruct divD_mem(regD dst, memory src) %{ 2577 predicate((UseSSE>=2) && (UseAVX == 0)); 2578 match(Set dst (DivD dst (LoadD src))); 2579 2580 format %{ "divsd $dst, $src" %} 2581 ins_cost(150); 2582 ins_encode %{ 2583 __ divsd($dst$$XMMRegister, $src$$Address); 2584 %} 2585 ins_pipe(pipe_slow); 2586 %} 2587 2588 instruct divD_imm(regD dst, immD con) %{ 2589 predicate((UseSSE>=2) && (UseAVX == 0)); 2590 match(Set dst (DivD dst con)); 2591 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2592 ins_cost(150); 2593 ins_encode %{ 2594 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2595 %} 2596 ins_pipe(pipe_slow); 2597 %} 2598 2599 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2600 predicate(UseAVX > 0); 2601 match(Set dst (DivD src1 src2)); 2602 2603 format %{ "vdivsd $dst, $src1, $src2" %} 2604 ins_cost(150); 2605 ins_encode %{ 2606 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2607 %} 2608 ins_pipe(pipe_slow); 2609 %} 2610 2611 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2612 predicate(UseAVX > 0); 2613 match(Set dst (DivD src1 (LoadD src2))); 2614 2615 format %{ "vdivsd $dst, $src1, $src2" %} 2616 ins_cost(150); 2617 ins_encode %{ 2618 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2619 %} 2620 ins_pipe(pipe_slow); 2621 %} 2622 2623 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2624 predicate(UseAVX > 0); 2625 match(Set dst (DivD src con)); 2626 2627 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2628 ins_cost(150); 2629 ins_encode %{ 2630 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2631 %} 2632 ins_pipe(pipe_slow); 2633 %} 2634 2635 instruct absF_reg(regF dst) %{ 2636 predicate((UseSSE>=1) && (UseAVX == 0)); 2637 match(Set dst (AbsF dst)); 2638 ins_cost(150); 2639 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2640 ins_encode %{ 2641 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2642 %} 2643 ins_pipe(pipe_slow); 2644 %} 2645 2646 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2647 predicate(UseAVX > 0); 2648 match(Set dst (AbsF src)); 2649 ins_cost(150); 2650 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2651 ins_encode %{ 2652 int vector_len = 0; 2653 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2654 ExternalAddress(float_signmask()), vector_len); 2655 %} 2656 ins_pipe(pipe_slow); 2657 %} 2658 2659 instruct absD_reg(regD dst) %{ 2660 predicate((UseSSE>=2) && (UseAVX == 0)); 2661 match(Set dst (AbsD dst)); 2662 ins_cost(150); 2663 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2664 "# abs double by sign masking" %} 2665 ins_encode %{ 2666 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2667 %} 2668 ins_pipe(pipe_slow); 2669 %} 2670 2671 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2672 predicate(UseAVX > 0); 2673 match(Set dst (AbsD src)); 2674 ins_cost(150); 2675 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2676 "# abs double by sign masking" %} 2677 ins_encode %{ 2678 int vector_len = 0; 2679 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2680 ExternalAddress(double_signmask()), vector_len); 2681 %} 2682 ins_pipe(pipe_slow); 2683 %} 2684 2685 instruct negF_reg(regF dst) %{ 2686 predicate((UseSSE>=1) && (UseAVX == 0)); 2687 match(Set dst (NegF dst)); 2688 ins_cost(150); 2689 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2690 ins_encode %{ 2691 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2692 %} 2693 ins_pipe(pipe_slow); 2694 %} 2695 2696 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2697 predicate(UseAVX > 0); 2698 match(Set dst (NegF src)); 2699 ins_cost(150); 2700 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2701 ins_encode %{ 2702 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2703 ExternalAddress(float_signflip())); 2704 %} 2705 ins_pipe(pipe_slow); 2706 %} 2707 2708 instruct negD_reg(regD dst) %{ 2709 predicate((UseSSE>=2) && (UseAVX == 0)); 2710 match(Set dst (NegD dst)); 2711 ins_cost(150); 2712 format %{ "xorpd $dst, [0x8000000000000000]\t" 2713 "# neg double by sign flipping" %} 2714 ins_encode %{ 2715 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2716 %} 2717 ins_pipe(pipe_slow); 2718 %} 2719 2720 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2721 predicate(UseAVX > 0); 2722 match(Set dst (NegD src)); 2723 ins_cost(150); 2724 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2725 "# neg double by sign flipping" %} 2726 ins_encode %{ 2727 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2728 ExternalAddress(double_signflip())); 2729 %} 2730 ins_pipe(pipe_slow); 2731 %} 2732 2733 instruct sqrtF_reg(regF dst, regF src) %{ 2734 predicate(UseSSE>=1); 2735 match(Set dst (SqrtF src)); 2736 2737 format %{ "sqrtss $dst, $src" %} 2738 ins_cost(150); 2739 ins_encode %{ 2740 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2741 %} 2742 ins_pipe(pipe_slow); 2743 %} 2744 2745 instruct sqrtF_mem(regF dst, memory src) %{ 2746 predicate(UseSSE>=1); 2747 match(Set dst (SqrtF (LoadF src))); 2748 2749 format %{ "sqrtss $dst, $src" %} 2750 ins_cost(150); 2751 ins_encode %{ 2752 __ sqrtss($dst$$XMMRegister, $src$$Address); 2753 %} 2754 ins_pipe(pipe_slow); 2755 %} 2756 2757 instruct sqrtF_imm(regF dst, immF con) %{ 2758 predicate(UseSSE>=1); 2759 match(Set dst (SqrtF con)); 2760 2761 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2762 ins_cost(150); 2763 ins_encode %{ 2764 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2765 %} 2766 ins_pipe(pipe_slow); 2767 %} 2768 2769 instruct sqrtD_reg(regD dst, regD src) %{ 2770 predicate(UseSSE>=2); 2771 match(Set dst (SqrtD src)); 2772 2773 format %{ "sqrtsd $dst, $src" %} 2774 ins_cost(150); 2775 ins_encode %{ 2776 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2777 %} 2778 ins_pipe(pipe_slow); 2779 %} 2780 2781 instruct sqrtD_mem(regD dst, memory src) %{ 2782 predicate(UseSSE>=2); 2783 match(Set dst (SqrtD (LoadD src))); 2784 2785 format %{ "sqrtsd $dst, $src" %} 2786 ins_cost(150); 2787 ins_encode %{ 2788 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2789 %} 2790 ins_pipe(pipe_slow); 2791 %} 2792 2793 instruct sqrtD_imm(regD dst, immD con) %{ 2794 predicate(UseSSE>=2); 2795 match(Set dst (SqrtD con)); 2796 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2797 ins_cost(150); 2798 ins_encode %{ 2799 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2800 %} 2801 ins_pipe(pipe_slow); 2802 %} 2803 2804 instruct onspinwait() %{ 2805 match(OnSpinWait); 2806 ins_cost(200); 2807 2808 format %{ 2809 $$template 2810 $$emit$$"pause\t! membar_onspinwait" 2811 %} 2812 ins_encode %{ 2813 __ pause(); 2814 %} 2815 ins_pipe(pipe_slow); 2816 %} 2817 2818 // a * b + c 2819 instruct fmaD_reg(regD a, regD b, regD c) %{ 2820 predicate(UseFMA); 2821 match(Set c (FmaD c (Binary a b))); 2822 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2823 ins_cost(150); 2824 ins_encode %{ 2825 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2826 %} 2827 ins_pipe( pipe_slow ); 2828 %} 2829 2830 // a * b + c 2831 instruct fmaF_reg(regF a, regF b, regF c) %{ 2832 predicate(UseFMA); 2833 match(Set c (FmaF c (Binary a b))); 2834 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2835 ins_cost(150); 2836 ins_encode %{ 2837 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2838 %} 2839 ins_pipe( pipe_slow ); 2840 %} 2841 2842 // ====================VECTOR INSTRUCTIONS===================================== 2843 2844 2845 // Load vectors (4 bytes long) 2846 instruct loadV4(vecS dst, memory mem) %{ 2847 predicate(n->as_LoadVector()->memory_size() == 4); 2848 match(Set dst (LoadVector mem)); 2849 ins_cost(125); 2850 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2851 ins_encode %{ 2852 __ movdl($dst$$XMMRegister, $mem$$Address); 2853 %} 2854 ins_pipe( pipe_slow ); 2855 %} 2856 2857 // Load vectors (4 bytes long) 2858 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2859 match(Set dst src); 2860 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2861 ins_encode %{ 2862 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2863 %} 2864 ins_pipe( fpu_reg_reg ); 2865 %} 2866 2867 // Load vectors (4 bytes long) 2868 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2869 match(Set dst src); 2870 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2871 ins_encode %{ 2872 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2873 %} 2874 ins_pipe( fpu_reg_reg ); 2875 %} 2876 2877 // Load vectors (8 bytes long) 2878 instruct loadV8(vecD dst, memory mem) %{ 2879 predicate(n->as_LoadVector()->memory_size() == 8); 2880 match(Set dst (LoadVector mem)); 2881 ins_cost(125); 2882 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2883 ins_encode %{ 2884 __ movq($dst$$XMMRegister, $mem$$Address); 2885 %} 2886 ins_pipe( pipe_slow ); 2887 %} 2888 2889 // Load vectors (8 bytes long) 2890 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2891 match(Set dst src); 2892 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2893 ins_encode %{ 2894 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2895 %} 2896 ins_pipe( fpu_reg_reg ); 2897 %} 2898 2899 // Load vectors (8 bytes long) 2900 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2901 match(Set dst src); 2902 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2903 ins_encode %{ 2904 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2905 %} 2906 ins_pipe( fpu_reg_reg ); 2907 %} 2908 2909 // Load vectors (16 bytes long) 2910 instruct loadV16(vecX dst, memory mem) %{ 2911 predicate(n->as_LoadVector()->memory_size() == 16); 2912 match(Set dst (LoadVector mem)); 2913 ins_cost(125); 2914 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2915 ins_encode %{ 2916 __ movdqu($dst$$XMMRegister, $mem$$Address); 2917 %} 2918 ins_pipe( pipe_slow ); 2919 %} 2920 2921 // Load vectors (16 bytes long) 2922 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2923 match(Set dst src); 2924 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2925 ins_encode %{ 2926 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2927 int vector_len = 2; 2928 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2929 } else { 2930 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2931 } 2932 %} 2933 ins_pipe( fpu_reg_reg ); 2934 %} 2935 2936 // Load vectors (16 bytes long) 2937 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2938 match(Set dst src); 2939 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2940 ins_encode %{ 2941 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2942 int vector_len = 2; 2943 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2944 } else { 2945 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2946 } 2947 %} 2948 ins_pipe( fpu_reg_reg ); 2949 %} 2950 2951 // Load vectors (32 bytes long) 2952 instruct loadV32(vecY dst, memory mem) %{ 2953 predicate(n->as_LoadVector()->memory_size() == 32); 2954 match(Set dst (LoadVector mem)); 2955 ins_cost(125); 2956 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2957 ins_encode %{ 2958 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 // Load vectors (32 bytes long) 2964 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 2965 match(Set dst src); 2966 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2967 ins_encode %{ 2968 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2969 int vector_len = 2; 2970 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2971 } else { 2972 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2973 } 2974 %} 2975 ins_pipe( fpu_reg_reg ); 2976 %} 2977 2978 // Load vectors (32 bytes long) 2979 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 2980 match(Set dst src); 2981 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2982 ins_encode %{ 2983 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2984 int vector_len = 2; 2985 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2986 } else { 2987 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2988 } 2989 %} 2990 ins_pipe( fpu_reg_reg ); 2991 %} 2992 2993 // Load vectors (64 bytes long) 2994 instruct loadV64_dword(vecZ dst, memory mem) %{ 2995 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2996 match(Set dst (LoadVector mem)); 2997 ins_cost(125); 2998 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2999 ins_encode %{ 3000 int vector_len = 2; 3001 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 // Load vectors (64 bytes long) 3007 instruct loadV64_qword(vecZ dst, memory mem) %{ 3008 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3009 match(Set dst (LoadVector mem)); 3010 ins_cost(125); 3011 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3012 ins_encode %{ 3013 int vector_len = 2; 3014 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3015 %} 3016 ins_pipe( pipe_slow ); 3017 %} 3018 3019 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3020 match(Set dst src); 3021 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3022 ins_encode %{ 3023 int vector_len = 2; 3024 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3025 %} 3026 ins_pipe( fpu_reg_reg ); 3027 %} 3028 3029 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3030 match(Set dst src); 3031 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3032 ins_encode %{ 3033 int vector_len = 2; 3034 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3035 %} 3036 ins_pipe( fpu_reg_reg ); 3037 %} 3038 3039 // Store vectors 3040 instruct storeV4(memory mem, vecS src) %{ 3041 predicate(n->as_StoreVector()->memory_size() == 4); 3042 match(Set mem (StoreVector mem src)); 3043 ins_cost(145); 3044 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3045 ins_encode %{ 3046 __ movdl($mem$$Address, $src$$XMMRegister); 3047 %} 3048 ins_pipe( pipe_slow ); 3049 %} 3050 3051 instruct storeV8(memory mem, vecD src) %{ 3052 predicate(n->as_StoreVector()->memory_size() == 8); 3053 match(Set mem (StoreVector mem src)); 3054 ins_cost(145); 3055 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3056 ins_encode %{ 3057 __ movq($mem$$Address, $src$$XMMRegister); 3058 %} 3059 ins_pipe( pipe_slow ); 3060 %} 3061 3062 instruct storeV16(memory mem, vecX src) %{ 3063 predicate(n->as_StoreVector()->memory_size() == 16); 3064 match(Set mem (StoreVector mem src)); 3065 ins_cost(145); 3066 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3067 ins_encode %{ 3068 __ movdqu($mem$$Address, $src$$XMMRegister); 3069 %} 3070 ins_pipe( pipe_slow ); 3071 %} 3072 3073 instruct storeV32(memory mem, vecY src) %{ 3074 predicate(n->as_StoreVector()->memory_size() == 32); 3075 match(Set mem (StoreVector mem src)); 3076 ins_cost(145); 3077 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3078 ins_encode %{ 3079 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3080 %} 3081 ins_pipe( pipe_slow ); 3082 %} 3083 3084 instruct storeV64_dword(memory mem, vecZ src) %{ 3085 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3086 match(Set mem (StoreVector mem src)); 3087 ins_cost(145); 3088 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3089 ins_encode %{ 3090 int vector_len = 2; 3091 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3092 %} 3093 ins_pipe( pipe_slow ); 3094 %} 3095 3096 instruct storeV64_qword(memory mem, vecZ src) %{ 3097 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3098 match(Set mem (StoreVector mem src)); 3099 ins_cost(145); 3100 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3101 ins_encode %{ 3102 int vector_len = 2; 3103 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3104 %} 3105 ins_pipe( pipe_slow ); 3106 %} 3107 3108 // ====================LEGACY REPLICATE======================================= 3109 3110 instruct Repl4B_mem(vecS dst, memory mem) %{ 3111 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3112 match(Set dst (ReplicateB (LoadB mem))); 3113 format %{ "punpcklbw $dst,$mem\n\t" 3114 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3115 ins_encode %{ 3116 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3117 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3118 %} 3119 ins_pipe( pipe_slow ); 3120 %} 3121 3122 instruct Repl8B_mem(vecD dst, memory mem) %{ 3123 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3124 match(Set dst (ReplicateB (LoadB mem))); 3125 format %{ "punpcklbw $dst,$mem\n\t" 3126 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3127 ins_encode %{ 3128 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3129 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3130 %} 3131 ins_pipe( pipe_slow ); 3132 %} 3133 3134 instruct Repl16B(vecX dst, rRegI src) %{ 3135 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3136 match(Set dst (ReplicateB src)); 3137 format %{ "movd $dst,$src\n\t" 3138 "punpcklbw $dst,$dst\n\t" 3139 "pshuflw $dst,$dst,0x00\n\t" 3140 "punpcklqdq $dst,$dst\t! replicate16B" %} 3141 ins_encode %{ 3142 __ movdl($dst$$XMMRegister, $src$$Register); 3143 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3144 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3145 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3146 %} 3147 ins_pipe( pipe_slow ); 3148 %} 3149 3150 instruct Repl16B_mem(vecX dst, memory mem) %{ 3151 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3152 match(Set dst (ReplicateB (LoadB mem))); 3153 format %{ "punpcklbw $dst,$mem\n\t" 3154 "pshuflw $dst,$dst,0x00\n\t" 3155 "punpcklqdq $dst,$dst\t! replicate16B" %} 3156 ins_encode %{ 3157 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3158 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3159 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3160 %} 3161 ins_pipe( pipe_slow ); 3162 %} 3163 3164 instruct Repl32B(vecY dst, rRegI src) %{ 3165 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3166 match(Set dst (ReplicateB src)); 3167 format %{ "movd $dst,$src\n\t" 3168 "punpcklbw $dst,$dst\n\t" 3169 "pshuflw $dst,$dst,0x00\n\t" 3170 "punpcklqdq $dst,$dst\n\t" 3171 "vinserti128_high $dst,$dst\t! replicate32B" %} 3172 ins_encode %{ 3173 __ movdl($dst$$XMMRegister, $src$$Register); 3174 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3175 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3176 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3177 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3178 %} 3179 ins_pipe( pipe_slow ); 3180 %} 3181 3182 instruct Repl32B_mem(vecY dst, memory mem) %{ 3183 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3184 match(Set dst (ReplicateB (LoadB mem))); 3185 format %{ "punpcklbw $dst,$mem\n\t" 3186 "pshuflw $dst,$dst,0x00\n\t" 3187 "punpcklqdq $dst,$dst\n\t" 3188 "vinserti128_high $dst,$dst\t! replicate32B" %} 3189 ins_encode %{ 3190 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3191 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3192 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3193 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3194 %} 3195 ins_pipe( pipe_slow ); 3196 %} 3197 3198 instruct Repl64B(legVecZ dst, rRegI src) %{ 3199 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3200 match(Set dst (ReplicateB src)); 3201 format %{ "movd $dst,$src\n\t" 3202 "punpcklbw $dst,$dst\n\t" 3203 "pshuflw $dst,$dst,0x00\n\t" 3204 "punpcklqdq $dst,$dst\n\t" 3205 "vinserti128_high $dst,$dst\t" 3206 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3207 ins_encode %{ 3208 __ movdl($dst$$XMMRegister, $src$$Register); 3209 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3210 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3211 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3212 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3213 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3214 %} 3215 ins_pipe( pipe_slow ); 3216 %} 3217 3218 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3219 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3220 match(Set dst (ReplicateB (LoadB mem))); 3221 format %{ "punpcklbw $dst,$mem\n\t" 3222 "pshuflw $dst,$dst,0x00\n\t" 3223 "punpcklqdq $dst,$dst\n\t" 3224 "vinserti128_high $dst,$dst\t" 3225 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3226 ins_encode %{ 3227 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3228 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3229 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3230 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3231 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3232 %} 3233 ins_pipe( pipe_slow ); 3234 %} 3235 3236 instruct Repl16B_imm(vecX dst, immI con) %{ 3237 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3238 match(Set dst (ReplicateB con)); 3239 format %{ "movq $dst,[$constantaddress]\n\t" 3240 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3241 ins_encode %{ 3242 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3243 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct Repl32B_imm(vecY dst, immI con) %{ 3249 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3250 match(Set dst (ReplicateB con)); 3251 format %{ "movq $dst,[$constantaddress]\n\t" 3252 "punpcklqdq $dst,$dst\n\t" 3253 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3254 ins_encode %{ 3255 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3256 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3257 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3258 %} 3259 ins_pipe( pipe_slow ); 3260 %} 3261 3262 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3263 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3264 match(Set dst (ReplicateB con)); 3265 format %{ "movq $dst,[$constantaddress]\n\t" 3266 "punpcklqdq $dst,$dst\n\t" 3267 "vinserti128_high $dst,$dst\t" 3268 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3269 ins_encode %{ 3270 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3271 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3272 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3273 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct Repl4S(vecD dst, rRegI src) %{ 3279 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3280 match(Set dst (ReplicateS src)); 3281 format %{ "movd $dst,$src\n\t" 3282 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3283 ins_encode %{ 3284 __ movdl($dst$$XMMRegister, $src$$Register); 3285 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3286 %} 3287 ins_pipe( pipe_slow ); 3288 %} 3289 3290 instruct Repl4S_mem(vecD dst, memory mem) %{ 3291 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3292 match(Set dst (ReplicateS (LoadS mem))); 3293 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3294 ins_encode %{ 3295 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 instruct Repl8S(vecX dst, rRegI src) %{ 3301 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3302 match(Set dst (ReplicateS src)); 3303 format %{ "movd $dst,$src\n\t" 3304 "pshuflw $dst,$dst,0x00\n\t" 3305 "punpcklqdq $dst,$dst\t! replicate8S" %} 3306 ins_encode %{ 3307 __ movdl($dst$$XMMRegister, $src$$Register); 3308 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3309 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct Repl8S_mem(vecX dst, memory mem) %{ 3315 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3316 match(Set dst (ReplicateS (LoadS mem))); 3317 format %{ "pshuflw $dst,$mem,0x00\n\t" 3318 "punpcklqdq $dst,$dst\t! replicate8S" %} 3319 ins_encode %{ 3320 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3321 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3322 %} 3323 ins_pipe( pipe_slow ); 3324 %} 3325 3326 instruct Repl8S_imm(vecX dst, immI con) %{ 3327 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3328 match(Set dst (ReplicateS con)); 3329 format %{ "movq $dst,[$constantaddress]\n\t" 3330 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3331 ins_encode %{ 3332 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3333 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3334 %} 3335 ins_pipe( pipe_slow ); 3336 %} 3337 3338 instruct Repl16S(vecY dst, rRegI src) %{ 3339 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3340 match(Set dst (ReplicateS src)); 3341 format %{ "movd $dst,$src\n\t" 3342 "pshuflw $dst,$dst,0x00\n\t" 3343 "punpcklqdq $dst,$dst\n\t" 3344 "vinserti128_high $dst,$dst\t! replicate16S" %} 3345 ins_encode %{ 3346 __ movdl($dst$$XMMRegister, $src$$Register); 3347 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3348 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3349 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3350 %} 3351 ins_pipe( pipe_slow ); 3352 %} 3353 3354 instruct Repl16S_mem(vecY dst, memory mem) %{ 3355 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3356 match(Set dst (ReplicateS (LoadS mem))); 3357 format %{ "pshuflw $dst,$mem,0x00\n\t" 3358 "punpcklqdq $dst,$dst\n\t" 3359 "vinserti128_high $dst,$dst\t! replicate16S" %} 3360 ins_encode %{ 3361 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3362 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3363 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3364 %} 3365 ins_pipe( pipe_slow ); 3366 %} 3367 3368 instruct Repl16S_imm(vecY dst, immI con) %{ 3369 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3370 match(Set dst (ReplicateS con)); 3371 format %{ "movq $dst,[$constantaddress]\n\t" 3372 "punpcklqdq $dst,$dst\n\t" 3373 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3374 ins_encode %{ 3375 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3376 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3377 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3378 %} 3379 ins_pipe( pipe_slow ); 3380 %} 3381 3382 instruct Repl32S(legVecZ dst, rRegI src) %{ 3383 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3384 match(Set dst (ReplicateS src)); 3385 format %{ "movd $dst,$src\n\t" 3386 "pshuflw $dst,$dst,0x00\n\t" 3387 "punpcklqdq $dst,$dst\n\t" 3388 "vinserti128_high $dst,$dst\t" 3389 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3390 ins_encode %{ 3391 __ movdl($dst$$XMMRegister, $src$$Register); 3392 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3393 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3394 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3395 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3396 %} 3397 ins_pipe( pipe_slow ); 3398 %} 3399 3400 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3401 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3402 match(Set dst (ReplicateS (LoadS mem))); 3403 format %{ "pshuflw $dst,$mem,0x00\n\t" 3404 "punpcklqdq $dst,$dst\n\t" 3405 "vinserti128_high $dst,$dst\t" 3406 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3407 ins_encode %{ 3408 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3409 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3410 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3411 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3412 %} 3413 ins_pipe( pipe_slow ); 3414 %} 3415 3416 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3417 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3418 match(Set dst (ReplicateS con)); 3419 format %{ "movq $dst,[$constantaddress]\n\t" 3420 "punpcklqdq $dst,$dst\n\t" 3421 "vinserti128_high $dst,$dst\t" 3422 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3423 ins_encode %{ 3424 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3425 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3426 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3427 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3428 %} 3429 ins_pipe( pipe_slow ); 3430 %} 3431 3432 instruct Repl4I(vecX dst, rRegI src) %{ 3433 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3434 match(Set dst (ReplicateI src)); 3435 format %{ "movd $dst,$src\n\t" 3436 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3437 ins_encode %{ 3438 __ movdl($dst$$XMMRegister, $src$$Register); 3439 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3440 %} 3441 ins_pipe( pipe_slow ); 3442 %} 3443 3444 instruct Repl4I_mem(vecX dst, memory mem) %{ 3445 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3446 match(Set dst (ReplicateI (LoadI mem))); 3447 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3448 ins_encode %{ 3449 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3450 %} 3451 ins_pipe( pipe_slow ); 3452 %} 3453 3454 instruct Repl8I(vecY dst, rRegI src) %{ 3455 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3456 match(Set dst (ReplicateI src)); 3457 format %{ "movd $dst,$src\n\t" 3458 "pshufd $dst,$dst,0x00\n\t" 3459 "vinserti128_high $dst,$dst\t! replicate8I" %} 3460 ins_encode %{ 3461 __ movdl($dst$$XMMRegister, $src$$Register); 3462 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3463 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3464 %} 3465 ins_pipe( pipe_slow ); 3466 %} 3467 3468 instruct Repl8I_mem(vecY dst, memory mem) %{ 3469 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3470 match(Set dst (ReplicateI (LoadI mem))); 3471 format %{ "pshufd $dst,$mem,0x00\n\t" 3472 "vinserti128_high $dst,$dst\t! replicate8I" %} 3473 ins_encode %{ 3474 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3475 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3476 %} 3477 ins_pipe( pipe_slow ); 3478 %} 3479 3480 instruct Repl16I(legVecZ dst, rRegI src) %{ 3481 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3482 match(Set dst (ReplicateI src)); 3483 format %{ "movd $dst,$src\n\t" 3484 "pshufd $dst,$dst,0x00\n\t" 3485 "vinserti128_high $dst,$dst\t" 3486 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3487 ins_encode %{ 3488 __ movdl($dst$$XMMRegister, $src$$Register); 3489 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3490 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3491 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3492 %} 3493 ins_pipe( pipe_slow ); 3494 %} 3495 3496 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3497 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3498 match(Set dst (ReplicateI (LoadI mem))); 3499 format %{ "pshufd $dst,$mem,0x00\n\t" 3500 "vinserti128_high $dst,$dst\t" 3501 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3502 ins_encode %{ 3503 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3504 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3505 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3506 %} 3507 ins_pipe( pipe_slow ); 3508 %} 3509 3510 instruct Repl4I_imm(vecX dst, immI con) %{ 3511 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3512 match(Set dst (ReplicateI con)); 3513 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3514 "punpcklqdq $dst,$dst" %} 3515 ins_encode %{ 3516 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3517 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 instruct Repl8I_imm(vecY dst, immI con) %{ 3523 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3524 match(Set dst (ReplicateI con)); 3525 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3526 "punpcklqdq $dst,$dst\n\t" 3527 "vinserti128_high $dst,$dst" %} 3528 ins_encode %{ 3529 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3530 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3531 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3532 %} 3533 ins_pipe( pipe_slow ); 3534 %} 3535 3536 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3537 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3538 match(Set dst (ReplicateI con)); 3539 format %{ "movq $dst,[$constantaddress]\t" 3540 "punpcklqdq $dst,$dst\n\t" 3541 "vinserti128_high $dst,$dst" 3542 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3543 ins_encode %{ 3544 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3545 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3546 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3547 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3548 %} 3549 ins_pipe( pipe_slow ); 3550 %} 3551 3552 // Long could be loaded into xmm register directly from memory. 3553 instruct Repl2L_mem(vecX dst, memory mem) %{ 3554 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3555 match(Set dst (ReplicateL (LoadL mem))); 3556 format %{ "movq $dst,$mem\n\t" 3557 "punpcklqdq $dst,$dst\t! replicate2L" %} 3558 ins_encode %{ 3559 __ movq($dst$$XMMRegister, $mem$$Address); 3560 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3561 %} 3562 ins_pipe( pipe_slow ); 3563 %} 3564 3565 // Replicate long (8 byte) scalar to be vector 3566 #ifdef _LP64 3567 instruct Repl4L(vecY dst, rRegL src) %{ 3568 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3569 match(Set dst (ReplicateL src)); 3570 format %{ "movdq $dst,$src\n\t" 3571 "punpcklqdq $dst,$dst\n\t" 3572 "vinserti128_high $dst,$dst\t! replicate4L" %} 3573 ins_encode %{ 3574 __ movdq($dst$$XMMRegister, $src$$Register); 3575 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3576 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3577 %} 3578 ins_pipe( pipe_slow ); 3579 %} 3580 3581 instruct Repl8L(legVecZ dst, rRegL src) %{ 3582 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3583 match(Set dst (ReplicateL src)); 3584 format %{ "movdq $dst,$src\n\t" 3585 "punpcklqdq $dst,$dst\n\t" 3586 "vinserti128_high $dst,$dst\t" 3587 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3588 ins_encode %{ 3589 __ movdq($dst$$XMMRegister, $src$$Register); 3590 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3591 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3592 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3593 %} 3594 ins_pipe( pipe_slow ); 3595 %} 3596 #else // _LP64 3597 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3598 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3599 match(Set dst (ReplicateL src)); 3600 effect(TEMP dst, USE src, TEMP tmp); 3601 format %{ "movdl $dst,$src.lo\n\t" 3602 "movdl $tmp,$src.hi\n\t" 3603 "punpckldq $dst,$tmp\n\t" 3604 "punpcklqdq $dst,$dst\n\t" 3605 "vinserti128_high $dst,$dst\t! replicate4L" %} 3606 ins_encode %{ 3607 __ movdl($dst$$XMMRegister, $src$$Register); 3608 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3609 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3610 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3611 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3612 %} 3613 ins_pipe( pipe_slow ); 3614 %} 3615 3616 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3617 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3618 match(Set dst (ReplicateL src)); 3619 effect(TEMP dst, USE src, TEMP tmp); 3620 format %{ "movdl $dst,$src.lo\n\t" 3621 "movdl $tmp,$src.hi\n\t" 3622 "punpckldq $dst,$tmp\n\t" 3623 "punpcklqdq $dst,$dst\n\t" 3624 "vinserti128_high $dst,$dst\t" 3625 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3626 ins_encode %{ 3627 __ movdl($dst$$XMMRegister, $src$$Register); 3628 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3629 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3630 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3631 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3632 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 #endif // _LP64 3637 3638 instruct Repl4L_imm(vecY dst, immL con) %{ 3639 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3640 match(Set dst (ReplicateL con)); 3641 format %{ "movq $dst,[$constantaddress]\n\t" 3642 "punpcklqdq $dst,$dst\n\t" 3643 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3644 ins_encode %{ 3645 __ movq($dst$$XMMRegister, $constantaddress($con)); 3646 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3647 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3653 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3654 match(Set dst (ReplicateL con)); 3655 format %{ "movq $dst,[$constantaddress]\n\t" 3656 "punpcklqdq $dst,$dst\n\t" 3657 "vinserti128_high $dst,$dst\t" 3658 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3659 ins_encode %{ 3660 __ movq($dst$$XMMRegister, $constantaddress($con)); 3661 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3662 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3663 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 instruct Repl4L_mem(vecY dst, memory mem) %{ 3669 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3670 match(Set dst (ReplicateL (LoadL mem))); 3671 format %{ "movq $dst,$mem\n\t" 3672 "punpcklqdq $dst,$dst\n\t" 3673 "vinserti128_high $dst,$dst\t! replicate4L" %} 3674 ins_encode %{ 3675 __ movq($dst$$XMMRegister, $mem$$Address); 3676 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3677 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3678 %} 3679 ins_pipe( pipe_slow ); 3680 %} 3681 3682 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3683 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3684 match(Set dst (ReplicateL (LoadL mem))); 3685 format %{ "movq $dst,$mem\n\t" 3686 "punpcklqdq $dst,$dst\n\t" 3687 "vinserti128_high $dst,$dst\t" 3688 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3689 ins_encode %{ 3690 __ movq($dst$$XMMRegister, $mem$$Address); 3691 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3692 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3693 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct Repl2F_mem(vecD dst, memory mem) %{ 3699 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3700 match(Set dst (ReplicateF (LoadF mem))); 3701 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3702 ins_encode %{ 3703 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3704 %} 3705 ins_pipe( pipe_slow ); 3706 %} 3707 3708 instruct Repl4F_mem(vecX dst, memory mem) %{ 3709 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3710 match(Set dst (ReplicateF (LoadF mem))); 3711 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3712 ins_encode %{ 3713 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3714 %} 3715 ins_pipe( pipe_slow ); 3716 %} 3717 3718 instruct Repl8F(vecY dst, vlRegF src) %{ 3719 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3720 match(Set dst (ReplicateF src)); 3721 format %{ "pshufd $dst,$src,0x00\n\t" 3722 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3723 ins_encode %{ 3724 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3725 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3726 %} 3727 ins_pipe( pipe_slow ); 3728 %} 3729 3730 instruct Repl8F_mem(vecY dst, memory mem) %{ 3731 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3732 match(Set dst (ReplicateF (LoadF mem))); 3733 format %{ "pshufd $dst,$mem,0x00\n\t" 3734 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3735 ins_encode %{ 3736 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3737 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3743 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3744 match(Set dst (ReplicateF src)); 3745 format %{ "pshufd $dst,$src,0x00\n\t" 3746 "vinsertf128_high $dst,$dst\t" 3747 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3748 ins_encode %{ 3749 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3750 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3751 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3752 %} 3753 ins_pipe( pipe_slow ); 3754 %} 3755 3756 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3757 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3758 match(Set dst (ReplicateF (LoadF mem))); 3759 format %{ "pshufd $dst,$mem,0x00\n\t" 3760 "vinsertf128_high $dst,$dst\t" 3761 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3762 ins_encode %{ 3763 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3764 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3765 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3766 %} 3767 ins_pipe( pipe_slow ); 3768 %} 3769 3770 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3771 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3772 match(Set dst (ReplicateF zero)); 3773 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3774 ins_encode %{ 3775 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3776 %} 3777 ins_pipe( fpu_reg_reg ); 3778 %} 3779 3780 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3781 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3782 match(Set dst (ReplicateF zero)); 3783 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3784 ins_encode %{ 3785 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3786 %} 3787 ins_pipe( fpu_reg_reg ); 3788 %} 3789 3790 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3791 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3792 match(Set dst (ReplicateF zero)); 3793 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3794 ins_encode %{ 3795 int vector_len = 1; 3796 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3797 %} 3798 ins_pipe( fpu_reg_reg ); 3799 %} 3800 3801 instruct Repl2D_mem(vecX dst, memory mem) %{ 3802 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3803 match(Set dst (ReplicateD (LoadD mem))); 3804 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3805 ins_encode %{ 3806 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3807 %} 3808 ins_pipe( pipe_slow ); 3809 %} 3810 3811 instruct Repl4D(vecY dst, vlRegD src) %{ 3812 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3813 match(Set dst (ReplicateD src)); 3814 format %{ "pshufd $dst,$src,0x44\n\t" 3815 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3816 ins_encode %{ 3817 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3818 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3819 %} 3820 ins_pipe( pipe_slow ); 3821 %} 3822 3823 instruct Repl4D_mem(vecY dst, memory mem) %{ 3824 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3825 match(Set dst (ReplicateD (LoadD mem))); 3826 format %{ "pshufd $dst,$mem,0x44\n\t" 3827 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3828 ins_encode %{ 3829 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3830 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3831 %} 3832 ins_pipe( pipe_slow ); 3833 %} 3834 3835 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3836 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3837 match(Set dst (ReplicateD src)); 3838 format %{ "pshufd $dst,$src,0x44\n\t" 3839 "vinsertf128_high $dst,$dst\t" 3840 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3841 ins_encode %{ 3842 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3843 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3844 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3845 %} 3846 ins_pipe( pipe_slow ); 3847 %} 3848 3849 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3850 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3851 match(Set dst (ReplicateD (LoadD mem))); 3852 format %{ "pshufd $dst,$mem,0x44\n\t" 3853 "vinsertf128_high $dst,$dst\t" 3854 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3855 ins_encode %{ 3856 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3857 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3858 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3859 %} 3860 ins_pipe( pipe_slow ); 3861 %} 3862 3863 // Replicate double (8 byte) scalar zero to be vector 3864 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3865 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3866 match(Set dst (ReplicateD zero)); 3867 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3868 ins_encode %{ 3869 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3870 %} 3871 ins_pipe( fpu_reg_reg ); 3872 %} 3873 3874 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3875 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3876 match(Set dst (ReplicateD zero)); 3877 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3878 ins_encode %{ 3879 int vector_len = 1; 3880 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3881 %} 3882 ins_pipe( fpu_reg_reg ); 3883 %} 3884 3885 // ====================GENERIC REPLICATE========================================== 3886 3887 // Replicate byte scalar to be vector 3888 instruct Repl4B(vecS dst, rRegI src) %{ 3889 predicate(n->as_Vector()->length() == 4); 3890 match(Set dst (ReplicateB src)); 3891 format %{ "movd $dst,$src\n\t" 3892 "punpcklbw $dst,$dst\n\t" 3893 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3894 ins_encode %{ 3895 __ movdl($dst$$XMMRegister, $src$$Register); 3896 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3897 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 instruct Repl8B(vecD dst, rRegI src) %{ 3903 predicate(n->as_Vector()->length() == 8); 3904 match(Set dst (ReplicateB src)); 3905 format %{ "movd $dst,$src\n\t" 3906 "punpcklbw $dst,$dst\n\t" 3907 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3908 ins_encode %{ 3909 __ movdl($dst$$XMMRegister, $src$$Register); 3910 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3911 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3912 %} 3913 ins_pipe( pipe_slow ); 3914 %} 3915 3916 // Replicate byte scalar immediate to be vector by loading from const table. 3917 instruct Repl4B_imm(vecS dst, immI con) %{ 3918 predicate(n->as_Vector()->length() == 4); 3919 match(Set dst (ReplicateB con)); 3920 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3921 ins_encode %{ 3922 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3923 %} 3924 ins_pipe( pipe_slow ); 3925 %} 3926 3927 instruct Repl8B_imm(vecD dst, immI con) %{ 3928 predicate(n->as_Vector()->length() == 8); 3929 match(Set dst (ReplicateB con)); 3930 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3931 ins_encode %{ 3932 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3933 %} 3934 ins_pipe( pipe_slow ); 3935 %} 3936 3937 // Replicate byte scalar zero to be vector 3938 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3939 predicate(n->as_Vector()->length() == 4); 3940 match(Set dst (ReplicateB zero)); 3941 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3942 ins_encode %{ 3943 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3944 %} 3945 ins_pipe( fpu_reg_reg ); 3946 %} 3947 3948 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3949 predicate(n->as_Vector()->length() == 8); 3950 match(Set dst (ReplicateB zero)); 3951 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3952 ins_encode %{ 3953 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3954 %} 3955 ins_pipe( fpu_reg_reg ); 3956 %} 3957 3958 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3959 predicate(n->as_Vector()->length() == 16); 3960 match(Set dst (ReplicateB zero)); 3961 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3962 ins_encode %{ 3963 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3964 %} 3965 ins_pipe( fpu_reg_reg ); 3966 %} 3967 3968 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3969 predicate(n->as_Vector()->length() == 32); 3970 match(Set dst (ReplicateB zero)); 3971 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3972 ins_encode %{ 3973 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3974 int vector_len = 1; 3975 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3976 %} 3977 ins_pipe( fpu_reg_reg ); 3978 %} 3979 3980 // Replicate char/short (2 byte) scalar to be vector 3981 instruct Repl2S(vecS dst, rRegI src) %{ 3982 predicate(n->as_Vector()->length() == 2); 3983 match(Set dst (ReplicateS src)); 3984 format %{ "movd $dst,$src\n\t" 3985 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3986 ins_encode %{ 3987 __ movdl($dst$$XMMRegister, $src$$Register); 3988 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3989 %} 3990 ins_pipe( fpu_reg_reg ); 3991 %} 3992 3993 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3994 instruct Repl2S_imm(vecS dst, immI con) %{ 3995 predicate(n->as_Vector()->length() == 2); 3996 match(Set dst (ReplicateS con)); 3997 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3998 ins_encode %{ 3999 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4000 %} 4001 ins_pipe( fpu_reg_reg ); 4002 %} 4003 4004 instruct Repl4S_imm(vecD dst, immI con) %{ 4005 predicate(n->as_Vector()->length() == 4); 4006 match(Set dst (ReplicateS con)); 4007 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4008 ins_encode %{ 4009 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4010 %} 4011 ins_pipe( fpu_reg_reg ); 4012 %} 4013 4014 // Replicate char/short (2 byte) scalar zero to be vector 4015 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4016 predicate(n->as_Vector()->length() == 2); 4017 match(Set dst (ReplicateS zero)); 4018 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4019 ins_encode %{ 4020 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4021 %} 4022 ins_pipe( fpu_reg_reg ); 4023 %} 4024 4025 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4026 predicate(n->as_Vector()->length() == 4); 4027 match(Set dst (ReplicateS zero)); 4028 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4029 ins_encode %{ 4030 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4031 %} 4032 ins_pipe( fpu_reg_reg ); 4033 %} 4034 4035 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4036 predicate(n->as_Vector()->length() == 8); 4037 match(Set dst (ReplicateS zero)); 4038 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4039 ins_encode %{ 4040 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4041 %} 4042 ins_pipe( fpu_reg_reg ); 4043 %} 4044 4045 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4046 predicate(n->as_Vector()->length() == 16); 4047 match(Set dst (ReplicateS zero)); 4048 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4049 ins_encode %{ 4050 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4051 int vector_len = 1; 4052 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4053 %} 4054 ins_pipe( fpu_reg_reg ); 4055 %} 4056 4057 // Replicate integer (4 byte) scalar to be vector 4058 instruct Repl2I(vecD dst, rRegI src) %{ 4059 predicate(n->as_Vector()->length() == 2); 4060 match(Set dst (ReplicateI src)); 4061 format %{ "movd $dst,$src\n\t" 4062 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4063 ins_encode %{ 4064 __ movdl($dst$$XMMRegister, $src$$Register); 4065 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4066 %} 4067 ins_pipe( fpu_reg_reg ); 4068 %} 4069 4070 // Integer could be loaded into xmm register directly from memory. 4071 instruct Repl2I_mem(vecD dst, memory mem) %{ 4072 predicate(n->as_Vector()->length() == 2); 4073 match(Set dst (ReplicateI (LoadI mem))); 4074 format %{ "movd $dst,$mem\n\t" 4075 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4076 ins_encode %{ 4077 __ movdl($dst$$XMMRegister, $mem$$Address); 4078 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4079 %} 4080 ins_pipe( fpu_reg_reg ); 4081 %} 4082 4083 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4084 instruct Repl2I_imm(vecD dst, immI con) %{ 4085 predicate(n->as_Vector()->length() == 2); 4086 match(Set dst (ReplicateI con)); 4087 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4088 ins_encode %{ 4089 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4090 %} 4091 ins_pipe( fpu_reg_reg ); 4092 %} 4093 4094 // Replicate integer (4 byte) scalar zero to be vector 4095 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4096 predicate(n->as_Vector()->length() == 2); 4097 match(Set dst (ReplicateI zero)); 4098 format %{ "pxor $dst,$dst\t! replicate2I" %} 4099 ins_encode %{ 4100 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4101 %} 4102 ins_pipe( fpu_reg_reg ); 4103 %} 4104 4105 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4106 predicate(n->as_Vector()->length() == 4); 4107 match(Set dst (ReplicateI zero)); 4108 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4109 ins_encode %{ 4110 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4111 %} 4112 ins_pipe( fpu_reg_reg ); 4113 %} 4114 4115 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4116 predicate(n->as_Vector()->length() == 8); 4117 match(Set dst (ReplicateI zero)); 4118 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4119 ins_encode %{ 4120 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4121 int vector_len = 1; 4122 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4123 %} 4124 ins_pipe( fpu_reg_reg ); 4125 %} 4126 4127 // Replicate long (8 byte) scalar to be vector 4128 #ifdef _LP64 4129 instruct Repl2L(vecX dst, rRegL src) %{ 4130 predicate(n->as_Vector()->length() == 2); 4131 match(Set dst (ReplicateL src)); 4132 format %{ "movdq $dst,$src\n\t" 4133 "punpcklqdq $dst,$dst\t! replicate2L" %} 4134 ins_encode %{ 4135 __ movdq($dst$$XMMRegister, $src$$Register); 4136 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4137 %} 4138 ins_pipe( pipe_slow ); 4139 %} 4140 #else // _LP64 4141 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4142 predicate(n->as_Vector()->length() == 2); 4143 match(Set dst (ReplicateL src)); 4144 effect(TEMP dst, USE src, TEMP tmp); 4145 format %{ "movdl $dst,$src.lo\n\t" 4146 "movdl $tmp,$src.hi\n\t" 4147 "punpckldq $dst,$tmp\n\t" 4148 "punpcklqdq $dst,$dst\t! replicate2L"%} 4149 ins_encode %{ 4150 __ movdl($dst$$XMMRegister, $src$$Register); 4151 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4152 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4153 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4154 %} 4155 ins_pipe( pipe_slow ); 4156 %} 4157 #endif // _LP64 4158 4159 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4160 instruct Repl2L_imm(vecX dst, immL con) %{ 4161 predicate(n->as_Vector()->length() == 2); 4162 match(Set dst (ReplicateL con)); 4163 format %{ "movq $dst,[$constantaddress]\n\t" 4164 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4165 ins_encode %{ 4166 __ movq($dst$$XMMRegister, $constantaddress($con)); 4167 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4168 %} 4169 ins_pipe( pipe_slow ); 4170 %} 4171 4172 // Replicate long (8 byte) scalar zero to be vector 4173 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4174 predicate(n->as_Vector()->length() == 2); 4175 match(Set dst (ReplicateL zero)); 4176 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4177 ins_encode %{ 4178 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4179 %} 4180 ins_pipe( fpu_reg_reg ); 4181 %} 4182 4183 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4184 predicate(n->as_Vector()->length() == 4); 4185 match(Set dst (ReplicateL zero)); 4186 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4187 ins_encode %{ 4188 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4189 int vector_len = 1; 4190 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4191 %} 4192 ins_pipe( fpu_reg_reg ); 4193 %} 4194 4195 // Replicate float (4 byte) scalar to be vector 4196 instruct Repl2F(vecD dst, vlRegF src) %{ 4197 predicate(n->as_Vector()->length() == 2); 4198 match(Set dst (ReplicateF src)); 4199 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4200 ins_encode %{ 4201 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4202 %} 4203 ins_pipe( fpu_reg_reg ); 4204 %} 4205 4206 instruct Repl4F(vecX dst, vlRegF src) %{ 4207 predicate(n->as_Vector()->length() == 4); 4208 match(Set dst (ReplicateF src)); 4209 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4210 ins_encode %{ 4211 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4212 %} 4213 ins_pipe( pipe_slow ); 4214 %} 4215 4216 // Replicate double (8 bytes) scalar to be vector 4217 instruct Repl2D(vecX dst, vlRegD src) %{ 4218 predicate(n->as_Vector()->length() == 2); 4219 match(Set dst (ReplicateD src)); 4220 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4221 ins_encode %{ 4222 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4223 %} 4224 ins_pipe( pipe_slow ); 4225 %} 4226 4227 // ====================EVEX REPLICATE============================================= 4228 4229 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4230 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4231 match(Set dst (ReplicateB (LoadB mem))); 4232 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4233 ins_encode %{ 4234 int vector_len = 0; 4235 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4236 %} 4237 ins_pipe( pipe_slow ); 4238 %} 4239 4240 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4241 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4242 match(Set dst (ReplicateB (LoadB mem))); 4243 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4244 ins_encode %{ 4245 int vector_len = 0; 4246 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4247 %} 4248 ins_pipe( pipe_slow ); 4249 %} 4250 4251 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4252 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4253 match(Set dst (ReplicateB src)); 4254 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4255 ins_encode %{ 4256 int vector_len = 0; 4257 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4258 %} 4259 ins_pipe( pipe_slow ); 4260 %} 4261 4262 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4263 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4264 match(Set dst (ReplicateB (LoadB mem))); 4265 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4266 ins_encode %{ 4267 int vector_len = 0; 4268 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4274 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4275 match(Set dst (ReplicateB src)); 4276 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4277 ins_encode %{ 4278 int vector_len = 1; 4279 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4280 %} 4281 ins_pipe( pipe_slow ); 4282 %} 4283 4284 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4285 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4286 match(Set dst (ReplicateB (LoadB mem))); 4287 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4288 ins_encode %{ 4289 int vector_len = 1; 4290 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4296 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4297 match(Set dst (ReplicateB src)); 4298 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4299 ins_encode %{ 4300 int vector_len = 2; 4301 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4302 %} 4303 ins_pipe( pipe_slow ); 4304 %} 4305 4306 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4307 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4308 match(Set dst (ReplicateB (LoadB mem))); 4309 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4310 ins_encode %{ 4311 int vector_len = 2; 4312 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4318 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4319 match(Set dst (ReplicateB con)); 4320 format %{ "movq $dst,[$constantaddress]\n\t" 4321 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4322 ins_encode %{ 4323 int vector_len = 0; 4324 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4325 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4331 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4332 match(Set dst (ReplicateB con)); 4333 format %{ "movq $dst,[$constantaddress]\n\t" 4334 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4335 ins_encode %{ 4336 int vector_len = 1; 4337 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4338 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4339 %} 4340 ins_pipe( pipe_slow ); 4341 %} 4342 4343 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4344 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4345 match(Set dst (ReplicateB con)); 4346 format %{ "movq $dst,[$constantaddress]\n\t" 4347 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4348 ins_encode %{ 4349 int vector_len = 2; 4350 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4351 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4352 %} 4353 ins_pipe( pipe_slow ); 4354 %} 4355 4356 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4357 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4358 match(Set dst (ReplicateB zero)); 4359 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4360 ins_encode %{ 4361 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4362 int vector_len = 2; 4363 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4364 %} 4365 ins_pipe( fpu_reg_reg ); 4366 %} 4367 4368 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4369 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4370 match(Set dst (ReplicateS src)); 4371 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4372 ins_encode %{ 4373 int vector_len = 0; 4374 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4375 %} 4376 ins_pipe( pipe_slow ); 4377 %} 4378 4379 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4380 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4381 match(Set dst (ReplicateS (LoadS mem))); 4382 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4383 ins_encode %{ 4384 int vector_len = 0; 4385 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4386 %} 4387 ins_pipe( pipe_slow ); 4388 %} 4389 4390 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4391 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4392 match(Set dst (ReplicateS src)); 4393 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4394 ins_encode %{ 4395 int vector_len = 0; 4396 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4397 %} 4398 ins_pipe( pipe_slow ); 4399 %} 4400 4401 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4402 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4403 match(Set dst (ReplicateS (LoadS mem))); 4404 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4405 ins_encode %{ 4406 int vector_len = 0; 4407 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 4412 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4413 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4414 match(Set dst (ReplicateS src)); 4415 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4416 ins_encode %{ 4417 int vector_len = 1; 4418 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4419 %} 4420 ins_pipe( pipe_slow ); 4421 %} 4422 4423 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4424 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4425 match(Set dst (ReplicateS (LoadS mem))); 4426 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4427 ins_encode %{ 4428 int vector_len = 1; 4429 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4430 %} 4431 ins_pipe( pipe_slow ); 4432 %} 4433 4434 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4435 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4436 match(Set dst (ReplicateS src)); 4437 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4438 ins_encode %{ 4439 int vector_len = 2; 4440 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4441 %} 4442 ins_pipe( pipe_slow ); 4443 %} 4444 4445 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4446 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4447 match(Set dst (ReplicateS (LoadS mem))); 4448 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4449 ins_encode %{ 4450 int vector_len = 2; 4451 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4452 %} 4453 ins_pipe( pipe_slow ); 4454 %} 4455 4456 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4457 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4458 match(Set dst (ReplicateS con)); 4459 format %{ "movq $dst,[$constantaddress]\n\t" 4460 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4461 ins_encode %{ 4462 int vector_len = 0; 4463 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4464 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4465 %} 4466 ins_pipe( pipe_slow ); 4467 %} 4468 4469 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4470 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4471 match(Set dst (ReplicateS con)); 4472 format %{ "movq $dst,[$constantaddress]\n\t" 4473 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4474 ins_encode %{ 4475 int vector_len = 1; 4476 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4477 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4478 %} 4479 ins_pipe( pipe_slow ); 4480 %} 4481 4482 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4483 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4484 match(Set dst (ReplicateS con)); 4485 format %{ "movq $dst,[$constantaddress]\n\t" 4486 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4487 ins_encode %{ 4488 int vector_len = 2; 4489 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4490 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4496 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4497 match(Set dst (ReplicateS zero)); 4498 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4499 ins_encode %{ 4500 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4501 int vector_len = 2; 4502 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4503 %} 4504 ins_pipe( fpu_reg_reg ); 4505 %} 4506 4507 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4508 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4509 match(Set dst (ReplicateI src)); 4510 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4511 ins_encode %{ 4512 int vector_len = 0; 4513 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4514 %} 4515 ins_pipe( pipe_slow ); 4516 %} 4517 4518 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4519 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4520 match(Set dst (ReplicateI (LoadI mem))); 4521 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4522 ins_encode %{ 4523 int vector_len = 0; 4524 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4525 %} 4526 ins_pipe( pipe_slow ); 4527 %} 4528 4529 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4530 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4531 match(Set dst (ReplicateI src)); 4532 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4533 ins_encode %{ 4534 int vector_len = 1; 4535 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4536 %} 4537 ins_pipe( pipe_slow ); 4538 %} 4539 4540 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4541 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4542 match(Set dst (ReplicateI (LoadI mem))); 4543 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4544 ins_encode %{ 4545 int vector_len = 1; 4546 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4547 %} 4548 ins_pipe( pipe_slow ); 4549 %} 4550 4551 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4552 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4553 match(Set dst (ReplicateI src)); 4554 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4555 ins_encode %{ 4556 int vector_len = 2; 4557 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4558 %} 4559 ins_pipe( pipe_slow ); 4560 %} 4561 4562 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4563 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4564 match(Set dst (ReplicateI (LoadI mem))); 4565 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4566 ins_encode %{ 4567 int vector_len = 2; 4568 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4569 %} 4570 ins_pipe( pipe_slow ); 4571 %} 4572 4573 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4574 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4575 match(Set dst (ReplicateI con)); 4576 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4577 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4578 ins_encode %{ 4579 int vector_len = 0; 4580 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4581 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4587 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4588 match(Set dst (ReplicateI con)); 4589 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4590 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4591 ins_encode %{ 4592 int vector_len = 1; 4593 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4594 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4595 %} 4596 ins_pipe( pipe_slow ); 4597 %} 4598 4599 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4600 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4601 match(Set dst (ReplicateI con)); 4602 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4603 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4604 ins_encode %{ 4605 int vector_len = 2; 4606 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4607 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4608 %} 4609 ins_pipe( pipe_slow ); 4610 %} 4611 4612 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4613 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4614 match(Set dst (ReplicateI zero)); 4615 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4616 ins_encode %{ 4617 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4618 int vector_len = 2; 4619 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4620 %} 4621 ins_pipe( fpu_reg_reg ); 4622 %} 4623 4624 // Replicate long (8 byte) scalar to be vector 4625 #ifdef _LP64 4626 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4627 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4628 match(Set dst (ReplicateL src)); 4629 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4630 ins_encode %{ 4631 int vector_len = 1; 4632 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4633 %} 4634 ins_pipe( pipe_slow ); 4635 %} 4636 4637 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4638 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4639 match(Set dst (ReplicateL src)); 4640 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4641 ins_encode %{ 4642 int vector_len = 2; 4643 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4644 %} 4645 ins_pipe( pipe_slow ); 4646 %} 4647 #else // _LP64 4648 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4649 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4650 match(Set dst (ReplicateL src)); 4651 effect(TEMP dst, USE src, TEMP tmp); 4652 format %{ "movdl $dst,$src.lo\n\t" 4653 "movdl $tmp,$src.hi\n\t" 4654 "punpckldq $dst,$tmp\n\t" 4655 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4656 ins_encode %{ 4657 int vector_len = 1; 4658 __ movdl($dst$$XMMRegister, $src$$Register); 4659 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4660 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4661 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4662 %} 4663 ins_pipe( pipe_slow ); 4664 %} 4665 4666 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4667 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4668 match(Set dst (ReplicateL src)); 4669 effect(TEMP dst, USE src, TEMP tmp); 4670 format %{ "movdl $dst,$src.lo\n\t" 4671 "movdl $tmp,$src.hi\n\t" 4672 "punpckldq $dst,$tmp\n\t" 4673 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4674 ins_encode %{ 4675 int vector_len = 2; 4676 __ movdl($dst$$XMMRegister, $src$$Register); 4677 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4678 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4679 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4680 %} 4681 ins_pipe( pipe_slow ); 4682 %} 4683 #endif // _LP64 4684 4685 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4686 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4687 match(Set dst (ReplicateL con)); 4688 format %{ "movq $dst,[$constantaddress]\n\t" 4689 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4690 ins_encode %{ 4691 int vector_len = 1; 4692 __ movq($dst$$XMMRegister, $constantaddress($con)); 4693 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4694 %} 4695 ins_pipe( pipe_slow ); 4696 %} 4697 4698 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4699 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4700 match(Set dst (ReplicateL con)); 4701 format %{ "movq $dst,[$constantaddress]\n\t" 4702 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4703 ins_encode %{ 4704 int vector_len = 2; 4705 __ movq($dst$$XMMRegister, $constantaddress($con)); 4706 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4712 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4713 match(Set dst (ReplicateL (LoadL mem))); 4714 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4715 ins_encode %{ 4716 int vector_len = 0; 4717 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4723 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4724 match(Set dst (ReplicateL (LoadL mem))); 4725 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4726 ins_encode %{ 4727 int vector_len = 1; 4728 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4729 %} 4730 ins_pipe( pipe_slow ); 4731 %} 4732 4733 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4734 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4735 match(Set dst (ReplicateL (LoadL mem))); 4736 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4737 ins_encode %{ 4738 int vector_len = 2; 4739 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4745 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4746 match(Set dst (ReplicateL zero)); 4747 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4748 ins_encode %{ 4749 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4750 int vector_len = 2; 4751 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4752 %} 4753 ins_pipe( fpu_reg_reg ); 4754 %} 4755 4756 instruct Repl8F_evex(vecY dst, regF src) %{ 4757 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4758 match(Set dst (ReplicateF src)); 4759 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4760 ins_encode %{ 4761 int vector_len = 1; 4762 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4763 %} 4764 ins_pipe( pipe_slow ); 4765 %} 4766 4767 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4768 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4769 match(Set dst (ReplicateF (LoadF mem))); 4770 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4771 ins_encode %{ 4772 int vector_len = 1; 4773 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4774 %} 4775 ins_pipe( pipe_slow ); 4776 %} 4777 4778 instruct Repl16F_evex(vecZ dst, regF src) %{ 4779 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4780 match(Set dst (ReplicateF src)); 4781 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4782 ins_encode %{ 4783 int vector_len = 2; 4784 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4785 %} 4786 ins_pipe( pipe_slow ); 4787 %} 4788 4789 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4790 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4791 match(Set dst (ReplicateF (LoadF mem))); 4792 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4793 ins_encode %{ 4794 int vector_len = 2; 4795 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4796 %} 4797 ins_pipe( pipe_slow ); 4798 %} 4799 4800 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4801 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4802 match(Set dst (ReplicateF zero)); 4803 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4804 ins_encode %{ 4805 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4806 int vector_len = 2; 4807 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4808 %} 4809 ins_pipe( fpu_reg_reg ); 4810 %} 4811 4812 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4813 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4814 match(Set dst (ReplicateF zero)); 4815 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4816 ins_encode %{ 4817 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4818 int vector_len = 2; 4819 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4820 %} 4821 ins_pipe( fpu_reg_reg ); 4822 %} 4823 4824 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4825 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4826 match(Set dst (ReplicateF zero)); 4827 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4828 ins_encode %{ 4829 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4830 int vector_len = 2; 4831 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4832 %} 4833 ins_pipe( fpu_reg_reg ); 4834 %} 4835 4836 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4837 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4838 match(Set dst (ReplicateF zero)); 4839 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4840 ins_encode %{ 4841 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4842 int vector_len = 2; 4843 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4844 %} 4845 ins_pipe( fpu_reg_reg ); 4846 %} 4847 4848 instruct Repl4D_evex(vecY dst, regD src) %{ 4849 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4850 match(Set dst (ReplicateD src)); 4851 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4852 ins_encode %{ 4853 int vector_len = 1; 4854 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4855 %} 4856 ins_pipe( pipe_slow ); 4857 %} 4858 4859 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4860 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4861 match(Set dst (ReplicateD (LoadD mem))); 4862 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4863 ins_encode %{ 4864 int vector_len = 1; 4865 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4866 %} 4867 ins_pipe( pipe_slow ); 4868 %} 4869 4870 instruct Repl8D_evex(vecZ dst, regD src) %{ 4871 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4872 match(Set dst (ReplicateD src)); 4873 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4874 ins_encode %{ 4875 int vector_len = 2; 4876 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4877 %} 4878 ins_pipe( pipe_slow ); 4879 %} 4880 4881 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4882 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4883 match(Set dst (ReplicateD (LoadD mem))); 4884 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4885 ins_encode %{ 4886 int vector_len = 2; 4887 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4888 %} 4889 ins_pipe( pipe_slow ); 4890 %} 4891 4892 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4893 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4894 match(Set dst (ReplicateD zero)); 4895 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4896 ins_encode %{ 4897 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4898 int vector_len = 2; 4899 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4900 %} 4901 ins_pipe( fpu_reg_reg ); 4902 %} 4903 4904 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4905 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4906 match(Set dst (ReplicateD zero)); 4907 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4908 ins_encode %{ 4909 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4910 int vector_len = 2; 4911 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4912 %} 4913 ins_pipe( fpu_reg_reg ); 4914 %} 4915 4916 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4917 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4918 match(Set dst (ReplicateD zero)); 4919 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4920 ins_encode %{ 4921 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4922 int vector_len = 2; 4923 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4924 %} 4925 ins_pipe( fpu_reg_reg ); 4926 %} 4927 4928 // ====================REDUCTION ARITHMETIC======================================= 4929 4930 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4931 predicate(UseSSE > 2 && UseAVX == 0); 4932 match(Set dst (AddReductionVI src1 src2)); 4933 effect(TEMP tmp2, TEMP tmp); 4934 format %{ "movdqu $tmp2,$src2\n\t" 4935 "phaddd $tmp2,$tmp2\n\t" 4936 "movd $tmp,$src1\n\t" 4937 "paddd $tmp,$tmp2\n\t" 4938 "movd $dst,$tmp\t! add reduction2I" %} 4939 ins_encode %{ 4940 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4941 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4942 __ movdl($tmp$$XMMRegister, $src1$$Register); 4943 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4944 __ movdl($dst$$Register, $tmp$$XMMRegister); 4945 %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4950 predicate(VM_Version::supports_avxonly()); 4951 match(Set dst (AddReductionVI src1 src2)); 4952 effect(TEMP tmp, TEMP tmp2); 4953 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4954 "movd $tmp2,$src1\n\t" 4955 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4956 "movd $dst,$tmp2\t! add reduction2I" %} 4957 ins_encode %{ 4958 int vector_len = 0; 4959 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4960 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4961 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4962 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4968 predicate(UseAVX > 2); 4969 match(Set dst (AddReductionVI src1 src2)); 4970 effect(TEMP tmp, TEMP tmp2); 4971 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4972 "vpaddd $tmp,$src2,$tmp2\n\t" 4973 "movd $tmp2,$src1\n\t" 4974 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4975 "movd $dst,$tmp2\t! add reduction2I" %} 4976 ins_encode %{ 4977 int vector_len = 0; 4978 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4979 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4980 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4981 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4982 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4988 predicate(UseSSE > 2 && UseAVX == 0); 4989 match(Set dst (AddReductionVI src1 src2)); 4990 effect(TEMP tmp, TEMP tmp2); 4991 format %{ "movdqu $tmp,$src2\n\t" 4992 "phaddd $tmp,$tmp\n\t" 4993 "phaddd $tmp,$tmp\n\t" 4994 "movd $tmp2,$src1\n\t" 4995 "paddd $tmp2,$tmp\n\t" 4996 "movd $dst,$tmp2\t! add reduction4I" %} 4997 ins_encode %{ 4998 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4999 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5000 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5001 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5002 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5003 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5004 %} 5005 ins_pipe( pipe_slow ); 5006 %} 5007 5008 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5009 predicate(VM_Version::supports_avxonly()); 5010 match(Set dst (AddReductionVI src1 src2)); 5011 effect(TEMP tmp, TEMP tmp2); 5012 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5013 "vphaddd $tmp,$tmp,$tmp\n\t" 5014 "movd $tmp2,$src1\n\t" 5015 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5016 "movd $dst,$tmp2\t! add reduction4I" %} 5017 ins_encode %{ 5018 int vector_len = 0; 5019 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5020 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5021 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5022 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5023 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5024 %} 5025 ins_pipe( pipe_slow ); 5026 %} 5027 5028 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5029 predicate(UseAVX > 2); 5030 match(Set dst (AddReductionVI src1 src2)); 5031 effect(TEMP tmp, TEMP tmp2); 5032 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5033 "vpaddd $tmp,$src2,$tmp2\n\t" 5034 "pshufd $tmp2,$tmp,0x1\n\t" 5035 "vpaddd $tmp,$tmp,$tmp2\n\t" 5036 "movd $tmp2,$src1\n\t" 5037 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5038 "movd $dst,$tmp2\t! add reduction4I" %} 5039 ins_encode %{ 5040 int vector_len = 0; 5041 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5042 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5043 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5044 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5045 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5046 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5047 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5048 %} 5049 ins_pipe( pipe_slow ); 5050 %} 5051 5052 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5053 predicate(VM_Version::supports_avxonly()); 5054 match(Set dst (AddReductionVI src1 src2)); 5055 effect(TEMP tmp, TEMP tmp2); 5056 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5057 "vphaddd $tmp,$tmp,$tmp2\n\t" 5058 "vextracti128_high $tmp2,$tmp\n\t" 5059 "vpaddd $tmp,$tmp,$tmp2\n\t" 5060 "movd $tmp2,$src1\n\t" 5061 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5062 "movd $dst,$tmp2\t! add reduction8I" %} 5063 ins_encode %{ 5064 int vector_len = 1; 5065 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5066 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5067 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5068 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5069 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5070 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5071 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5072 %} 5073 ins_pipe( pipe_slow ); 5074 %} 5075 5076 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5077 predicate(UseAVX > 2); 5078 match(Set dst (AddReductionVI src1 src2)); 5079 effect(TEMP tmp, TEMP tmp2); 5080 format %{ "vextracti128_high $tmp,$src2\n\t" 5081 "vpaddd $tmp,$tmp,$src2\n\t" 5082 "pshufd $tmp2,$tmp,0xE\n\t" 5083 "vpaddd $tmp,$tmp,$tmp2\n\t" 5084 "pshufd $tmp2,$tmp,0x1\n\t" 5085 "vpaddd $tmp,$tmp,$tmp2\n\t" 5086 "movd $tmp2,$src1\n\t" 5087 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5088 "movd $dst,$tmp2\t! add reduction8I" %} 5089 ins_encode %{ 5090 int vector_len = 0; 5091 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5092 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5093 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5094 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5095 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5096 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5097 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5098 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5099 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5100 %} 5101 ins_pipe( pipe_slow ); 5102 %} 5103 5104 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5105 predicate(UseAVX > 2); 5106 match(Set dst (AddReductionVI src1 src2)); 5107 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5108 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5109 "vpaddd $tmp3,$tmp3,$src2\n\t" 5110 "vextracti128_high $tmp,$tmp3\n\t" 5111 "vpaddd $tmp,$tmp,$tmp3\n\t" 5112 "pshufd $tmp2,$tmp,0xE\n\t" 5113 "vpaddd $tmp,$tmp,$tmp2\n\t" 5114 "pshufd $tmp2,$tmp,0x1\n\t" 5115 "vpaddd $tmp,$tmp,$tmp2\n\t" 5116 "movd $tmp2,$src1\n\t" 5117 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5118 "movd $dst,$tmp2\t! mul reduction16I" %} 5119 ins_encode %{ 5120 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5121 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5122 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5123 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5124 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5125 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5126 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5127 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5128 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5129 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5130 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5131 %} 5132 ins_pipe( pipe_slow ); 5133 %} 5134 5135 #ifdef _LP64 5136 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5137 predicate(UseAVX > 2); 5138 match(Set dst (AddReductionVL src1 src2)); 5139 effect(TEMP tmp, TEMP tmp2); 5140 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5141 "vpaddq $tmp,$src2,$tmp2\n\t" 5142 "movdq $tmp2,$src1\n\t" 5143 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5144 "movdq $dst,$tmp2\t! add reduction2L" %} 5145 ins_encode %{ 5146 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5147 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5148 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5149 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5150 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5151 %} 5152 ins_pipe( pipe_slow ); 5153 %} 5154 5155 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5156 predicate(UseAVX > 2); 5157 match(Set dst (AddReductionVL src1 src2)); 5158 effect(TEMP tmp, TEMP tmp2); 5159 format %{ "vextracti128_high $tmp,$src2\n\t" 5160 "vpaddq $tmp2,$tmp,$src2\n\t" 5161 "pshufd $tmp,$tmp2,0xE\n\t" 5162 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5163 "movdq $tmp,$src1\n\t" 5164 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5165 "movdq $dst,$tmp2\t! add reduction4L" %} 5166 ins_encode %{ 5167 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5168 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5169 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5170 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5171 __ movdq($tmp$$XMMRegister, $src1$$Register); 5172 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5173 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5174 %} 5175 ins_pipe( pipe_slow ); 5176 %} 5177 5178 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5179 predicate(UseAVX > 2); 5180 match(Set dst (AddReductionVL src1 src2)); 5181 effect(TEMP tmp, TEMP tmp2); 5182 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5183 "vpaddq $tmp2,$tmp2,$src2\n\t" 5184 "vextracti128_high $tmp,$tmp2\n\t" 5185 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5186 "pshufd $tmp,$tmp2,0xE\n\t" 5187 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5188 "movdq $tmp,$src1\n\t" 5189 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5190 "movdq $dst,$tmp2\t! add reduction8L" %} 5191 ins_encode %{ 5192 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5193 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5194 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5195 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5196 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5197 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5198 __ movdq($tmp$$XMMRegister, $src1$$Register); 5199 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5200 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5201 %} 5202 ins_pipe( pipe_slow ); 5203 %} 5204 #endif 5205 5206 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5207 predicate(UseSSE >= 1 && UseAVX == 0); 5208 match(Set dst (AddReductionVF dst src2)); 5209 effect(TEMP dst, TEMP tmp); 5210 format %{ "addss $dst,$src2\n\t" 5211 "pshufd $tmp,$src2,0x01\n\t" 5212 "addss $dst,$tmp\t! add reduction2F" %} 5213 ins_encode %{ 5214 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5215 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5216 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5222 predicate(UseAVX > 0); 5223 match(Set dst (AddReductionVF dst src2)); 5224 effect(TEMP dst, TEMP tmp); 5225 format %{ "vaddss $dst,$dst,$src2\n\t" 5226 "pshufd $tmp,$src2,0x01\n\t" 5227 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5228 ins_encode %{ 5229 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5230 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5231 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5237 predicate(UseSSE >= 1 && UseAVX == 0); 5238 match(Set dst (AddReductionVF dst src2)); 5239 effect(TEMP dst, TEMP tmp); 5240 format %{ "addss $dst,$src2\n\t" 5241 "pshufd $tmp,$src2,0x01\n\t" 5242 "addss $dst,$tmp\n\t" 5243 "pshufd $tmp,$src2,0x02\n\t" 5244 "addss $dst,$tmp\n\t" 5245 "pshufd $tmp,$src2,0x03\n\t" 5246 "addss $dst,$tmp\t! add reduction4F" %} 5247 ins_encode %{ 5248 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5249 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5250 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5251 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5252 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5253 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5254 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5255 %} 5256 ins_pipe( pipe_slow ); 5257 %} 5258 5259 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5260 predicate(UseAVX > 0); 5261 match(Set dst (AddReductionVF dst src2)); 5262 effect(TEMP tmp, TEMP dst); 5263 format %{ "vaddss $dst,dst,$src2\n\t" 5264 "pshufd $tmp,$src2,0x01\n\t" 5265 "vaddss $dst,$dst,$tmp\n\t" 5266 "pshufd $tmp,$src2,0x02\n\t" 5267 "vaddss $dst,$dst,$tmp\n\t" 5268 "pshufd $tmp,$src2,0x03\n\t" 5269 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5270 ins_encode %{ 5271 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5272 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5273 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5274 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5275 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5276 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5277 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5278 %} 5279 ins_pipe( pipe_slow ); 5280 %} 5281 5282 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5283 predicate(UseAVX > 0); 5284 match(Set dst (AddReductionVF dst src2)); 5285 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5286 format %{ "vaddss $dst,$dst,$src2\n\t" 5287 "pshufd $tmp,$src2,0x01\n\t" 5288 "vaddss $dst,$dst,$tmp\n\t" 5289 "pshufd $tmp,$src2,0x02\n\t" 5290 "vaddss $dst,$dst,$tmp\n\t" 5291 "pshufd $tmp,$src2,0x03\n\t" 5292 "vaddss $dst,$dst,$tmp\n\t" 5293 "vextractf128_high $tmp2,$src2\n\t" 5294 "vaddss $dst,$dst,$tmp2\n\t" 5295 "pshufd $tmp,$tmp2,0x01\n\t" 5296 "vaddss $dst,$dst,$tmp\n\t" 5297 "pshufd $tmp,$tmp2,0x02\n\t" 5298 "vaddss $dst,$dst,$tmp\n\t" 5299 "pshufd $tmp,$tmp2,0x03\n\t" 5300 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5301 ins_encode %{ 5302 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5303 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5304 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5305 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5306 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5307 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5308 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5309 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5310 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5311 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5312 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5313 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5314 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5315 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5316 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5317 %} 5318 ins_pipe( pipe_slow ); 5319 %} 5320 5321 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5322 predicate(UseAVX > 2); 5323 match(Set dst (AddReductionVF dst src2)); 5324 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5325 format %{ "vaddss $dst,$dst,$src2\n\t" 5326 "pshufd $tmp,$src2,0x01\n\t" 5327 "vaddss $dst,$dst,$tmp\n\t" 5328 "pshufd $tmp,$src2,0x02\n\t" 5329 "vaddss $dst,$dst,$tmp\n\t" 5330 "pshufd $tmp,$src2,0x03\n\t" 5331 "vaddss $dst,$dst,$tmp\n\t" 5332 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5333 "vaddss $dst,$dst,$tmp2\n\t" 5334 "pshufd $tmp,$tmp2,0x01\n\t" 5335 "vaddss $dst,$dst,$tmp\n\t" 5336 "pshufd $tmp,$tmp2,0x02\n\t" 5337 "vaddss $dst,$dst,$tmp\n\t" 5338 "pshufd $tmp,$tmp2,0x03\n\t" 5339 "vaddss $dst,$dst,$tmp\n\t" 5340 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5341 "vaddss $dst,$dst,$tmp2\n\t" 5342 "pshufd $tmp,$tmp2,0x01\n\t" 5343 "vaddss $dst,$dst,$tmp\n\t" 5344 "pshufd $tmp,$tmp2,0x02\n\t" 5345 "vaddss $dst,$dst,$tmp\n\t" 5346 "pshufd $tmp,$tmp2,0x03\n\t" 5347 "vaddss $dst,$dst,$tmp\n\t" 5348 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5349 "vaddss $dst,$dst,$tmp2\n\t" 5350 "pshufd $tmp,$tmp2,0x01\n\t" 5351 "vaddss $dst,$dst,$tmp\n\t" 5352 "pshufd $tmp,$tmp2,0x02\n\t" 5353 "vaddss $dst,$dst,$tmp\n\t" 5354 "pshufd $tmp,$tmp2,0x03\n\t" 5355 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5356 ins_encode %{ 5357 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5358 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5359 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5360 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5361 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5362 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5363 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5364 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5365 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5366 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5367 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5368 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5369 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5370 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5371 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5372 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5373 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5374 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5375 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5376 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5377 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5378 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5379 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5380 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5381 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5382 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5383 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5384 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5385 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5386 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5387 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5388 %} 5389 ins_pipe( pipe_slow ); 5390 %} 5391 5392 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5393 predicate(UseSSE >= 1 && UseAVX == 0); 5394 match(Set dst (AddReductionVD dst src2)); 5395 effect(TEMP tmp, TEMP dst); 5396 format %{ "addsd $dst,$src2\n\t" 5397 "pshufd $tmp,$src2,0xE\n\t" 5398 "addsd $dst,$tmp\t! add reduction2D" %} 5399 ins_encode %{ 5400 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5401 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5402 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5403 %} 5404 ins_pipe( pipe_slow ); 5405 %} 5406 5407 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5408 predicate(UseAVX > 0); 5409 match(Set dst (AddReductionVD dst src2)); 5410 effect(TEMP tmp, TEMP dst); 5411 format %{ "vaddsd $dst,$dst,$src2\n\t" 5412 "pshufd $tmp,$src2,0xE\n\t" 5413 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5414 ins_encode %{ 5415 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5416 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5417 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5418 %} 5419 ins_pipe( pipe_slow ); 5420 %} 5421 5422 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5423 predicate(UseAVX > 0); 5424 match(Set dst (AddReductionVD dst src2)); 5425 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5426 format %{ "vaddsd $dst,$dst,$src2\n\t" 5427 "pshufd $tmp,$src2,0xE\n\t" 5428 "vaddsd $dst,$dst,$tmp\n\t" 5429 "vextractf128 $tmp2,$src2,0x1\n\t" 5430 "vaddsd $dst,$dst,$tmp2\n\t" 5431 "pshufd $tmp,$tmp2,0xE\n\t" 5432 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5433 ins_encode %{ 5434 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5435 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5436 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5437 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5438 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5439 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5440 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5441 %} 5442 ins_pipe( pipe_slow ); 5443 %} 5444 5445 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5446 predicate(UseAVX > 2); 5447 match(Set dst (AddReductionVD dst src2)); 5448 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5449 format %{ "vaddsd $dst,$dst,$src2\n\t" 5450 "pshufd $tmp,$src2,0xE\n\t" 5451 "vaddsd $dst,$dst,$tmp\n\t" 5452 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5453 "vaddsd $dst,$dst,$tmp2\n\t" 5454 "pshufd $tmp,$tmp2,0xE\n\t" 5455 "vaddsd $dst,$dst,$tmp\n\t" 5456 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5457 "vaddsd $dst,$dst,$tmp2\n\t" 5458 "pshufd $tmp,$tmp2,0xE\n\t" 5459 "vaddsd $dst,$dst,$tmp\n\t" 5460 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5461 "vaddsd $dst,$dst,$tmp2\n\t" 5462 "pshufd $tmp,$tmp2,0xE\n\t" 5463 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5464 ins_encode %{ 5465 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5466 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5467 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5468 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5469 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5470 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5471 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5472 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5473 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5474 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5475 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5476 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5477 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5478 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5479 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5480 %} 5481 ins_pipe( pipe_slow ); 5482 %} 5483 5484 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5485 predicate(UseSSE > 3 && UseAVX == 0); 5486 match(Set dst (MulReductionVI src1 src2)); 5487 effect(TEMP tmp, TEMP tmp2); 5488 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5489 "pmulld $tmp2,$src2\n\t" 5490 "movd $tmp,$src1\n\t" 5491 "pmulld $tmp2,$tmp\n\t" 5492 "movd $dst,$tmp2\t! mul reduction2I" %} 5493 ins_encode %{ 5494 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5495 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5496 __ movdl($tmp$$XMMRegister, $src1$$Register); 5497 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5498 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5499 %} 5500 ins_pipe( pipe_slow ); 5501 %} 5502 5503 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5504 predicate(UseAVX > 0); 5505 match(Set dst (MulReductionVI src1 src2)); 5506 effect(TEMP tmp, TEMP tmp2); 5507 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5508 "vpmulld $tmp,$src2,$tmp2\n\t" 5509 "movd $tmp2,$src1\n\t" 5510 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5511 "movd $dst,$tmp2\t! mul reduction2I" %} 5512 ins_encode %{ 5513 int vector_len = 0; 5514 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5515 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5516 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5517 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5518 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5519 %} 5520 ins_pipe( pipe_slow ); 5521 %} 5522 5523 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5524 predicate(UseSSE > 3 && UseAVX == 0); 5525 match(Set dst (MulReductionVI src1 src2)); 5526 effect(TEMP tmp, TEMP tmp2); 5527 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5528 "pmulld $tmp2,$src2\n\t" 5529 "pshufd $tmp,$tmp2,0x1\n\t" 5530 "pmulld $tmp2,$tmp\n\t" 5531 "movd $tmp,$src1\n\t" 5532 "pmulld $tmp2,$tmp\n\t" 5533 "movd $dst,$tmp2\t! mul reduction4I" %} 5534 ins_encode %{ 5535 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5536 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5537 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5538 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5539 __ movdl($tmp$$XMMRegister, $src1$$Register); 5540 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5541 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5542 %} 5543 ins_pipe( pipe_slow ); 5544 %} 5545 5546 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5547 predicate(UseAVX > 0); 5548 match(Set dst (MulReductionVI src1 src2)); 5549 effect(TEMP tmp, TEMP tmp2); 5550 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5551 "vpmulld $tmp,$src2,$tmp2\n\t" 5552 "pshufd $tmp2,$tmp,0x1\n\t" 5553 "vpmulld $tmp,$tmp,$tmp2\n\t" 5554 "movd $tmp2,$src1\n\t" 5555 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5556 "movd $dst,$tmp2\t! mul reduction4I" %} 5557 ins_encode %{ 5558 int vector_len = 0; 5559 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5560 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5561 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5562 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5563 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5564 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5565 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5566 %} 5567 ins_pipe( pipe_slow ); 5568 %} 5569 5570 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5571 predicate(UseAVX > 1); 5572 match(Set dst (MulReductionVI src1 src2)); 5573 effect(TEMP tmp, TEMP tmp2); 5574 format %{ "vextracti128_high $tmp,$src2\n\t" 5575 "vpmulld $tmp,$tmp,$src2\n\t" 5576 "pshufd $tmp2,$tmp,0xE\n\t" 5577 "vpmulld $tmp,$tmp,$tmp2\n\t" 5578 "pshufd $tmp2,$tmp,0x1\n\t" 5579 "vpmulld $tmp,$tmp,$tmp2\n\t" 5580 "movd $tmp2,$src1\n\t" 5581 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5582 "movd $dst,$tmp2\t! mul reduction8I" %} 5583 ins_encode %{ 5584 int vector_len = 0; 5585 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5586 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5587 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5588 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5589 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5590 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5591 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5592 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5593 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5594 %} 5595 ins_pipe( pipe_slow ); 5596 %} 5597 5598 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5599 predicate(UseAVX > 2); 5600 match(Set dst (MulReductionVI src1 src2)); 5601 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5602 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5603 "vpmulld $tmp3,$tmp3,$src2\n\t" 5604 "vextracti128_high $tmp,$tmp3\n\t" 5605 "vpmulld $tmp,$tmp,$src2\n\t" 5606 "pshufd $tmp2,$tmp,0xE\n\t" 5607 "vpmulld $tmp,$tmp,$tmp2\n\t" 5608 "pshufd $tmp2,$tmp,0x1\n\t" 5609 "vpmulld $tmp,$tmp,$tmp2\n\t" 5610 "movd $tmp2,$src1\n\t" 5611 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5612 "movd $dst,$tmp2\t! mul reduction16I" %} 5613 ins_encode %{ 5614 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5615 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5616 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5617 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5618 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5619 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5620 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5621 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5622 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5623 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5624 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5625 %} 5626 ins_pipe( pipe_slow ); 5627 %} 5628 5629 #ifdef _LP64 5630 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5631 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5632 match(Set dst (MulReductionVL src1 src2)); 5633 effect(TEMP tmp, TEMP tmp2); 5634 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5635 "vpmullq $tmp,$src2,$tmp2\n\t" 5636 "movdq $tmp2,$src1\n\t" 5637 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5638 "movdq $dst,$tmp2\t! mul reduction2L" %} 5639 ins_encode %{ 5640 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5641 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5642 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5643 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5644 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5645 %} 5646 ins_pipe( pipe_slow ); 5647 %} 5648 5649 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5650 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5651 match(Set dst (MulReductionVL src1 src2)); 5652 effect(TEMP tmp, TEMP tmp2); 5653 format %{ "vextracti128_high $tmp,$src2\n\t" 5654 "vpmullq $tmp2,$tmp,$src2\n\t" 5655 "pshufd $tmp,$tmp2,0xE\n\t" 5656 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5657 "movdq $tmp,$src1\n\t" 5658 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5659 "movdq $dst,$tmp2\t! mul reduction4L" %} 5660 ins_encode %{ 5661 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5662 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5663 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5664 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5665 __ movdq($tmp$$XMMRegister, $src1$$Register); 5666 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5667 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5668 %} 5669 ins_pipe( pipe_slow ); 5670 %} 5671 5672 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5673 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5674 match(Set dst (MulReductionVL src1 src2)); 5675 effect(TEMP tmp, TEMP tmp2); 5676 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5677 "vpmullq $tmp2,$tmp2,$src2\n\t" 5678 "vextracti128_high $tmp,$tmp2\n\t" 5679 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5680 "pshufd $tmp,$tmp2,0xE\n\t" 5681 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5682 "movdq $tmp,$src1\n\t" 5683 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5684 "movdq $dst,$tmp2\t! mul reduction8L" %} 5685 ins_encode %{ 5686 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5687 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5688 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5689 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5690 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5691 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5692 __ movdq($tmp$$XMMRegister, $src1$$Register); 5693 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5694 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5695 %} 5696 ins_pipe( pipe_slow ); 5697 %} 5698 #endif 5699 5700 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5701 predicate(UseSSE >= 1 && UseAVX == 0); 5702 match(Set dst (MulReductionVF dst src2)); 5703 effect(TEMP dst, TEMP tmp); 5704 format %{ "mulss $dst,$src2\n\t" 5705 "pshufd $tmp,$src2,0x01\n\t" 5706 "mulss $dst,$tmp\t! mul reduction2F" %} 5707 ins_encode %{ 5708 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5709 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5710 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5711 %} 5712 ins_pipe( pipe_slow ); 5713 %} 5714 5715 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5716 predicate(UseAVX > 0); 5717 match(Set dst (MulReductionVF dst src2)); 5718 effect(TEMP tmp, TEMP dst); 5719 format %{ "vmulss $dst,$dst,$src2\n\t" 5720 "pshufd $tmp,$src2,0x01\n\t" 5721 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5722 ins_encode %{ 5723 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5724 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5725 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5726 %} 5727 ins_pipe( pipe_slow ); 5728 %} 5729 5730 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5731 predicate(UseSSE >= 1 && UseAVX == 0); 5732 match(Set dst (MulReductionVF dst src2)); 5733 effect(TEMP dst, TEMP tmp); 5734 format %{ "mulss $dst,$src2\n\t" 5735 "pshufd $tmp,$src2,0x01\n\t" 5736 "mulss $dst,$tmp\n\t" 5737 "pshufd $tmp,$src2,0x02\n\t" 5738 "mulss $dst,$tmp\n\t" 5739 "pshufd $tmp,$src2,0x03\n\t" 5740 "mulss $dst,$tmp\t! mul reduction4F" %} 5741 ins_encode %{ 5742 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5743 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5744 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5745 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5746 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5747 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5748 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5749 %} 5750 ins_pipe( pipe_slow ); 5751 %} 5752 5753 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5754 predicate(UseAVX > 0); 5755 match(Set dst (MulReductionVF dst src2)); 5756 effect(TEMP tmp, TEMP dst); 5757 format %{ "vmulss $dst,$dst,$src2\n\t" 5758 "pshufd $tmp,$src2,0x01\n\t" 5759 "vmulss $dst,$dst,$tmp\n\t" 5760 "pshufd $tmp,$src2,0x02\n\t" 5761 "vmulss $dst,$dst,$tmp\n\t" 5762 "pshufd $tmp,$src2,0x03\n\t" 5763 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5764 ins_encode %{ 5765 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5766 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5767 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5768 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5769 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5770 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5771 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5772 %} 5773 ins_pipe( pipe_slow ); 5774 %} 5775 5776 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5777 predicate(UseAVX > 0); 5778 match(Set dst (MulReductionVF dst src2)); 5779 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5780 format %{ "vmulss $dst,$dst,$src2\n\t" 5781 "pshufd $tmp,$src2,0x01\n\t" 5782 "vmulss $dst,$dst,$tmp\n\t" 5783 "pshufd $tmp,$src2,0x02\n\t" 5784 "vmulss $dst,$dst,$tmp\n\t" 5785 "pshufd $tmp,$src2,0x03\n\t" 5786 "vmulss $dst,$dst,$tmp\n\t" 5787 "vextractf128_high $tmp2,$src2\n\t" 5788 "vmulss $dst,$dst,$tmp2\n\t" 5789 "pshufd $tmp,$tmp2,0x01\n\t" 5790 "vmulss $dst,$dst,$tmp\n\t" 5791 "pshufd $tmp,$tmp2,0x02\n\t" 5792 "vmulss $dst,$dst,$tmp\n\t" 5793 "pshufd $tmp,$tmp2,0x03\n\t" 5794 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5795 ins_encode %{ 5796 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5797 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5798 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5799 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5800 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5801 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5802 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5803 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5804 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5805 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5806 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5807 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5808 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5809 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5810 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5816 predicate(UseAVX > 2); 5817 match(Set dst (MulReductionVF dst src2)); 5818 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5819 format %{ "vmulss $dst,$dst,$src2\n\t" 5820 "pshufd $tmp,$src2,0x01\n\t" 5821 "vmulss $dst,$dst,$tmp\n\t" 5822 "pshufd $tmp,$src2,0x02\n\t" 5823 "vmulss $dst,$dst,$tmp\n\t" 5824 "pshufd $tmp,$src2,0x03\n\t" 5825 "vmulss $dst,$dst,$tmp\n\t" 5826 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5827 "vmulss $dst,$dst,$tmp2\n\t" 5828 "pshufd $tmp,$tmp2,0x01\n\t" 5829 "vmulss $dst,$dst,$tmp\n\t" 5830 "pshufd $tmp,$tmp2,0x02\n\t" 5831 "vmulss $dst,$dst,$tmp\n\t" 5832 "pshufd $tmp,$tmp2,0x03\n\t" 5833 "vmulss $dst,$dst,$tmp\n\t" 5834 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5835 "vmulss $dst,$dst,$tmp2\n\t" 5836 "pshufd $tmp,$tmp2,0x01\n\t" 5837 "vmulss $dst,$dst,$tmp\n\t" 5838 "pshufd $tmp,$tmp2,0x02\n\t" 5839 "vmulss $dst,$dst,$tmp\n\t" 5840 "pshufd $tmp,$tmp2,0x03\n\t" 5841 "vmulss $dst,$dst,$tmp\n\t" 5842 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5843 "vmulss $dst,$dst,$tmp2\n\t" 5844 "pshufd $tmp,$tmp2,0x01\n\t" 5845 "vmulss $dst,$dst,$tmp\n\t" 5846 "pshufd $tmp,$tmp2,0x02\n\t" 5847 "vmulss $dst,$dst,$tmp\n\t" 5848 "pshufd $tmp,$tmp2,0x03\n\t" 5849 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5850 ins_encode %{ 5851 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5852 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5853 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5854 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5855 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5856 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5857 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5858 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5859 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5860 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5861 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5862 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5863 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5864 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5865 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5866 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5867 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5868 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5869 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5870 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5871 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5872 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5873 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5874 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5875 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5876 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5877 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5878 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5879 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5880 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5881 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5882 %} 5883 ins_pipe( pipe_slow ); 5884 %} 5885 5886 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5887 predicate(UseSSE >= 1 && UseAVX == 0); 5888 match(Set dst (MulReductionVD dst src2)); 5889 effect(TEMP dst, TEMP tmp); 5890 format %{ "mulsd $dst,$src2\n\t" 5891 "pshufd $tmp,$src2,0xE\n\t" 5892 "mulsd $dst,$tmp\t! mul reduction2D" %} 5893 ins_encode %{ 5894 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5895 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5896 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5897 %} 5898 ins_pipe( pipe_slow ); 5899 %} 5900 5901 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5902 predicate(UseAVX > 0); 5903 match(Set dst (MulReductionVD dst src2)); 5904 effect(TEMP tmp, TEMP dst); 5905 format %{ "vmulsd $dst,$dst,$src2\n\t" 5906 "pshufd $tmp,$src2,0xE\n\t" 5907 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5908 ins_encode %{ 5909 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5910 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5911 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5917 predicate(UseAVX > 0); 5918 match(Set dst (MulReductionVD dst src2)); 5919 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5920 format %{ "vmulsd $dst,$dst,$src2\n\t" 5921 "pshufd $tmp,$src2,0xE\n\t" 5922 "vmulsd $dst,$dst,$tmp\n\t" 5923 "vextractf128_high $tmp2,$src2\n\t" 5924 "vmulsd $dst,$dst,$tmp2\n\t" 5925 "pshufd $tmp,$tmp2,0xE\n\t" 5926 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5927 ins_encode %{ 5928 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5929 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5930 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5931 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5932 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5933 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5934 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5940 predicate(UseAVX > 2); 5941 match(Set dst (MulReductionVD dst src2)); 5942 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5943 format %{ "vmulsd $dst,$dst,$src2\n\t" 5944 "pshufd $tmp,$src2,0xE\n\t" 5945 "vmulsd $dst,$dst,$tmp\n\t" 5946 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5947 "vmulsd $dst,$dst,$tmp2\n\t" 5948 "pshufd $tmp,$src2,0xE\n\t" 5949 "vmulsd $dst,$dst,$tmp\n\t" 5950 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5951 "vmulsd $dst,$dst,$tmp2\n\t" 5952 "pshufd $tmp,$tmp2,0xE\n\t" 5953 "vmulsd $dst,$dst,$tmp\n\t" 5954 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5955 "vmulsd $dst,$dst,$tmp2\n\t" 5956 "pshufd $tmp,$tmp2,0xE\n\t" 5957 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5958 ins_encode %{ 5959 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5960 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5961 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5962 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5963 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5964 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5965 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5966 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5967 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5968 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5969 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5970 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5971 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5972 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5973 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5974 %} 5975 ins_pipe( pipe_slow ); 5976 %} 5977 5978 // ====================VECTOR ARITHMETIC======================================= 5979 5980 // --------------------------------- ADD -------------------------------------- 5981 5982 // Bytes vector add 5983 instruct vadd4B(vecS dst, vecS src) %{ 5984 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5985 match(Set dst (AddVB dst src)); 5986 format %{ "paddb $dst,$src\t! add packed4B" %} 5987 ins_encode %{ 5988 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5989 %} 5990 ins_pipe( pipe_slow ); 5991 %} 5992 5993 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5994 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5995 match(Set dst (AddVB src1 src2)); 5996 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5997 ins_encode %{ 5998 int vector_len = 0; 5999 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6000 %} 6001 ins_pipe( pipe_slow ); 6002 %} 6003 6004 6005 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6006 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6007 match(Set dst (AddVB src (LoadVector mem))); 6008 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6009 ins_encode %{ 6010 int vector_len = 0; 6011 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6012 %} 6013 ins_pipe( pipe_slow ); 6014 %} 6015 6016 instruct vadd8B(vecD dst, vecD src) %{ 6017 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6018 match(Set dst (AddVB dst src)); 6019 format %{ "paddb $dst,$src\t! add packed8B" %} 6020 ins_encode %{ 6021 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6022 %} 6023 ins_pipe( pipe_slow ); 6024 %} 6025 6026 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6027 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6028 match(Set dst (AddVB src1 src2)); 6029 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6030 ins_encode %{ 6031 int vector_len = 0; 6032 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6033 %} 6034 ins_pipe( pipe_slow ); 6035 %} 6036 6037 6038 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6039 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6040 match(Set dst (AddVB src (LoadVector mem))); 6041 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6042 ins_encode %{ 6043 int vector_len = 0; 6044 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vadd16B(vecX dst, vecX src) %{ 6050 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6051 match(Set dst (AddVB dst src)); 6052 format %{ "paddb $dst,$src\t! add packed16B" %} 6053 ins_encode %{ 6054 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6055 %} 6056 ins_pipe( pipe_slow ); 6057 %} 6058 6059 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6060 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6061 match(Set dst (AddVB src1 src2)); 6062 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6063 ins_encode %{ 6064 int vector_len = 0; 6065 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6066 %} 6067 ins_pipe( pipe_slow ); 6068 %} 6069 6070 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6071 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6072 match(Set dst (AddVB src (LoadVector mem))); 6073 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6074 ins_encode %{ 6075 int vector_len = 0; 6076 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6077 %} 6078 ins_pipe( pipe_slow ); 6079 %} 6080 6081 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6082 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6083 match(Set dst (AddVB src1 src2)); 6084 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6085 ins_encode %{ 6086 int vector_len = 1; 6087 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6093 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6094 match(Set dst (AddVB src (LoadVector mem))); 6095 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6096 ins_encode %{ 6097 int vector_len = 1; 6098 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6099 %} 6100 ins_pipe( pipe_slow ); 6101 %} 6102 6103 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6104 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6105 match(Set dst (AddVB src1 src2)); 6106 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6107 ins_encode %{ 6108 int vector_len = 2; 6109 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6115 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6116 match(Set dst (AddVB src (LoadVector mem))); 6117 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6118 ins_encode %{ 6119 int vector_len = 2; 6120 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6121 %} 6122 ins_pipe( pipe_slow ); 6123 %} 6124 6125 // Shorts/Chars vector add 6126 instruct vadd2S(vecS dst, vecS src) %{ 6127 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6128 match(Set dst (AddVS dst src)); 6129 format %{ "paddw $dst,$src\t! add packed2S" %} 6130 ins_encode %{ 6131 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6132 %} 6133 ins_pipe( pipe_slow ); 6134 %} 6135 6136 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6137 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6138 match(Set dst (AddVS src1 src2)); 6139 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6140 ins_encode %{ 6141 int vector_len = 0; 6142 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6143 %} 6144 ins_pipe( pipe_slow ); 6145 %} 6146 6147 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6148 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6149 match(Set dst (AddVS src (LoadVector mem))); 6150 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6151 ins_encode %{ 6152 int vector_len = 0; 6153 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6154 %} 6155 ins_pipe( pipe_slow ); 6156 %} 6157 6158 instruct vadd4S(vecD dst, vecD src) %{ 6159 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6160 match(Set dst (AddVS dst src)); 6161 format %{ "paddw $dst,$src\t! add packed4S" %} 6162 ins_encode %{ 6163 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6164 %} 6165 ins_pipe( pipe_slow ); 6166 %} 6167 6168 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6169 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6170 match(Set dst (AddVS src1 src2)); 6171 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6172 ins_encode %{ 6173 int vector_len = 0; 6174 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6180 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6181 match(Set dst (AddVS src (LoadVector mem))); 6182 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6183 ins_encode %{ 6184 int vector_len = 0; 6185 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6186 %} 6187 ins_pipe( pipe_slow ); 6188 %} 6189 6190 instruct vadd8S(vecX dst, vecX src) %{ 6191 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6192 match(Set dst (AddVS dst src)); 6193 format %{ "paddw $dst,$src\t! add packed8S" %} 6194 ins_encode %{ 6195 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6196 %} 6197 ins_pipe( pipe_slow ); 6198 %} 6199 6200 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6201 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6202 match(Set dst (AddVS src1 src2)); 6203 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6204 ins_encode %{ 6205 int vector_len = 0; 6206 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6207 %} 6208 ins_pipe( pipe_slow ); 6209 %} 6210 6211 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6212 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6213 match(Set dst (AddVS src (LoadVector mem))); 6214 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6215 ins_encode %{ 6216 int vector_len = 0; 6217 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6218 %} 6219 ins_pipe( pipe_slow ); 6220 %} 6221 6222 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6223 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6224 match(Set dst (AddVS src1 src2)); 6225 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6226 ins_encode %{ 6227 int vector_len = 1; 6228 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6229 %} 6230 ins_pipe( pipe_slow ); 6231 %} 6232 6233 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6234 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6235 match(Set dst (AddVS src (LoadVector mem))); 6236 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6237 ins_encode %{ 6238 int vector_len = 1; 6239 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6240 %} 6241 ins_pipe( pipe_slow ); 6242 %} 6243 6244 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6245 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6246 match(Set dst (AddVS src1 src2)); 6247 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6248 ins_encode %{ 6249 int vector_len = 2; 6250 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6251 %} 6252 ins_pipe( pipe_slow ); 6253 %} 6254 6255 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6256 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6257 match(Set dst (AddVS src (LoadVector mem))); 6258 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6259 ins_encode %{ 6260 int vector_len = 2; 6261 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6262 %} 6263 ins_pipe( pipe_slow ); 6264 %} 6265 6266 // Integers vector add 6267 instruct vadd2I(vecD dst, vecD src) %{ 6268 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6269 match(Set dst (AddVI dst src)); 6270 format %{ "paddd $dst,$src\t! add packed2I" %} 6271 ins_encode %{ 6272 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6273 %} 6274 ins_pipe( pipe_slow ); 6275 %} 6276 6277 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6278 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6279 match(Set dst (AddVI src1 src2)); 6280 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6281 ins_encode %{ 6282 int vector_len = 0; 6283 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6284 %} 6285 ins_pipe( pipe_slow ); 6286 %} 6287 6288 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6289 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6290 match(Set dst (AddVI src (LoadVector mem))); 6291 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6292 ins_encode %{ 6293 int vector_len = 0; 6294 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6295 %} 6296 ins_pipe( pipe_slow ); 6297 %} 6298 6299 instruct vadd4I(vecX dst, vecX src) %{ 6300 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6301 match(Set dst (AddVI dst src)); 6302 format %{ "paddd $dst,$src\t! add packed4I" %} 6303 ins_encode %{ 6304 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6305 %} 6306 ins_pipe( pipe_slow ); 6307 %} 6308 6309 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6310 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6311 match(Set dst (AddVI src1 src2)); 6312 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6313 ins_encode %{ 6314 int vector_len = 0; 6315 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6316 %} 6317 ins_pipe( pipe_slow ); 6318 %} 6319 6320 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6321 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6322 match(Set dst (AddVI src (LoadVector mem))); 6323 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6324 ins_encode %{ 6325 int vector_len = 0; 6326 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6332 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6333 match(Set dst (AddVI src1 src2)); 6334 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6335 ins_encode %{ 6336 int vector_len = 1; 6337 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6338 %} 6339 ins_pipe( pipe_slow ); 6340 %} 6341 6342 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6343 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6344 match(Set dst (AddVI src (LoadVector mem))); 6345 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6346 ins_encode %{ 6347 int vector_len = 1; 6348 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6349 %} 6350 ins_pipe( pipe_slow ); 6351 %} 6352 6353 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6354 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6355 match(Set dst (AddVI src1 src2)); 6356 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6357 ins_encode %{ 6358 int vector_len = 2; 6359 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6365 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6366 match(Set dst (AddVI src (LoadVector mem))); 6367 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6368 ins_encode %{ 6369 int vector_len = 2; 6370 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6371 %} 6372 ins_pipe( pipe_slow ); 6373 %} 6374 6375 // Longs vector add 6376 instruct vadd2L(vecX dst, vecX src) %{ 6377 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6378 match(Set dst (AddVL dst src)); 6379 format %{ "paddq $dst,$src\t! add packed2L" %} 6380 ins_encode %{ 6381 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6382 %} 6383 ins_pipe( pipe_slow ); 6384 %} 6385 6386 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6387 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6388 match(Set dst (AddVL src1 src2)); 6389 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6390 ins_encode %{ 6391 int vector_len = 0; 6392 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6393 %} 6394 ins_pipe( pipe_slow ); 6395 %} 6396 6397 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6398 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6399 match(Set dst (AddVL src (LoadVector mem))); 6400 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6401 ins_encode %{ 6402 int vector_len = 0; 6403 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6404 %} 6405 ins_pipe( pipe_slow ); 6406 %} 6407 6408 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6409 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6410 match(Set dst (AddVL src1 src2)); 6411 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6412 ins_encode %{ 6413 int vector_len = 1; 6414 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6415 %} 6416 ins_pipe( pipe_slow ); 6417 %} 6418 6419 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6420 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6421 match(Set dst (AddVL src (LoadVector mem))); 6422 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6423 ins_encode %{ 6424 int vector_len = 1; 6425 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6426 %} 6427 ins_pipe( pipe_slow ); 6428 %} 6429 6430 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6431 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6432 match(Set dst (AddVL src1 src2)); 6433 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6434 ins_encode %{ 6435 int vector_len = 2; 6436 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6437 %} 6438 ins_pipe( pipe_slow ); 6439 %} 6440 6441 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6442 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6443 match(Set dst (AddVL src (LoadVector mem))); 6444 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6445 ins_encode %{ 6446 int vector_len = 2; 6447 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6448 %} 6449 ins_pipe( pipe_slow ); 6450 %} 6451 6452 // Floats vector add 6453 instruct vadd2F(vecD dst, vecD src) %{ 6454 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6455 match(Set dst (AddVF dst src)); 6456 format %{ "addps $dst,$src\t! add packed2F" %} 6457 ins_encode %{ 6458 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6459 %} 6460 ins_pipe( pipe_slow ); 6461 %} 6462 6463 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6464 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6465 match(Set dst (AddVF src1 src2)); 6466 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6467 ins_encode %{ 6468 int vector_len = 0; 6469 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6470 %} 6471 ins_pipe( pipe_slow ); 6472 %} 6473 6474 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6475 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6476 match(Set dst (AddVF src (LoadVector mem))); 6477 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6478 ins_encode %{ 6479 int vector_len = 0; 6480 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6481 %} 6482 ins_pipe( pipe_slow ); 6483 %} 6484 6485 instruct vadd4F(vecX dst, vecX src) %{ 6486 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6487 match(Set dst (AddVF dst src)); 6488 format %{ "addps $dst,$src\t! add packed4F" %} 6489 ins_encode %{ 6490 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6491 %} 6492 ins_pipe( pipe_slow ); 6493 %} 6494 6495 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6496 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6497 match(Set dst (AddVF src1 src2)); 6498 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6499 ins_encode %{ 6500 int vector_len = 0; 6501 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6502 %} 6503 ins_pipe( pipe_slow ); 6504 %} 6505 6506 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6507 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6508 match(Set dst (AddVF src (LoadVector mem))); 6509 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6510 ins_encode %{ 6511 int vector_len = 0; 6512 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6513 %} 6514 ins_pipe( pipe_slow ); 6515 %} 6516 6517 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6518 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6519 match(Set dst (AddVF src1 src2)); 6520 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6521 ins_encode %{ 6522 int vector_len = 1; 6523 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6524 %} 6525 ins_pipe( pipe_slow ); 6526 %} 6527 6528 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6529 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6530 match(Set dst (AddVF src (LoadVector mem))); 6531 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6532 ins_encode %{ 6533 int vector_len = 1; 6534 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6535 %} 6536 ins_pipe( pipe_slow ); 6537 %} 6538 6539 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6540 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6541 match(Set dst (AddVF src1 src2)); 6542 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6543 ins_encode %{ 6544 int vector_len = 2; 6545 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6551 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6552 match(Set dst (AddVF src (LoadVector mem))); 6553 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6554 ins_encode %{ 6555 int vector_len = 2; 6556 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6557 %} 6558 ins_pipe( pipe_slow ); 6559 %} 6560 6561 // Doubles vector add 6562 instruct vadd2D(vecX dst, vecX src) %{ 6563 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6564 match(Set dst (AddVD dst src)); 6565 format %{ "addpd $dst,$src\t! add packed2D" %} 6566 ins_encode %{ 6567 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6568 %} 6569 ins_pipe( pipe_slow ); 6570 %} 6571 6572 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6573 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6574 match(Set dst (AddVD src1 src2)); 6575 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6576 ins_encode %{ 6577 int vector_len = 0; 6578 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6579 %} 6580 ins_pipe( pipe_slow ); 6581 %} 6582 6583 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6584 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6585 match(Set dst (AddVD src (LoadVector mem))); 6586 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6587 ins_encode %{ 6588 int vector_len = 0; 6589 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6590 %} 6591 ins_pipe( pipe_slow ); 6592 %} 6593 6594 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6595 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6596 match(Set dst (AddVD src1 src2)); 6597 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6598 ins_encode %{ 6599 int vector_len = 1; 6600 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6601 %} 6602 ins_pipe( pipe_slow ); 6603 %} 6604 6605 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6606 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6607 match(Set dst (AddVD src (LoadVector mem))); 6608 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6609 ins_encode %{ 6610 int vector_len = 1; 6611 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6612 %} 6613 ins_pipe( pipe_slow ); 6614 %} 6615 6616 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6617 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6618 match(Set dst (AddVD src1 src2)); 6619 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6620 ins_encode %{ 6621 int vector_len = 2; 6622 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6623 %} 6624 ins_pipe( pipe_slow ); 6625 %} 6626 6627 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6628 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6629 match(Set dst (AddVD src (LoadVector mem))); 6630 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6631 ins_encode %{ 6632 int vector_len = 2; 6633 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 // --------------------------------- SUB -------------------------------------- 6639 6640 // Bytes vector sub 6641 instruct vsub4B(vecS dst, vecS src) %{ 6642 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6643 match(Set dst (SubVB dst src)); 6644 format %{ "psubb $dst,$src\t! sub packed4B" %} 6645 ins_encode %{ 6646 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6653 match(Set dst (SubVB src1 src2)); 6654 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6655 ins_encode %{ 6656 int vector_len = 0; 6657 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6663 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6664 match(Set dst (SubVB src (LoadVector mem))); 6665 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6666 ins_encode %{ 6667 int vector_len = 0; 6668 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6669 %} 6670 ins_pipe( pipe_slow ); 6671 %} 6672 6673 instruct vsub8B(vecD dst, vecD src) %{ 6674 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6675 match(Set dst (SubVB dst src)); 6676 format %{ "psubb $dst,$src\t! sub packed8B" %} 6677 ins_encode %{ 6678 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6679 %} 6680 ins_pipe( pipe_slow ); 6681 %} 6682 6683 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6684 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6685 match(Set dst (SubVB src1 src2)); 6686 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6687 ins_encode %{ 6688 int vector_len = 0; 6689 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6690 %} 6691 ins_pipe( pipe_slow ); 6692 %} 6693 6694 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6695 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6696 match(Set dst (SubVB src (LoadVector mem))); 6697 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6698 ins_encode %{ 6699 int vector_len = 0; 6700 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6701 %} 6702 ins_pipe( pipe_slow ); 6703 %} 6704 6705 instruct vsub16B(vecX dst, vecX src) %{ 6706 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6707 match(Set dst (SubVB dst src)); 6708 format %{ "psubb $dst,$src\t! sub packed16B" %} 6709 ins_encode %{ 6710 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6711 %} 6712 ins_pipe( pipe_slow ); 6713 %} 6714 6715 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6716 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6717 match(Set dst (SubVB src1 src2)); 6718 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6719 ins_encode %{ 6720 int vector_len = 0; 6721 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6722 %} 6723 ins_pipe( pipe_slow ); 6724 %} 6725 6726 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6727 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6728 match(Set dst (SubVB src (LoadVector mem))); 6729 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6730 ins_encode %{ 6731 int vector_len = 0; 6732 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6733 %} 6734 ins_pipe( pipe_slow ); 6735 %} 6736 6737 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6738 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6739 match(Set dst (SubVB src1 src2)); 6740 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6741 ins_encode %{ 6742 int vector_len = 1; 6743 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6744 %} 6745 ins_pipe( pipe_slow ); 6746 %} 6747 6748 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6749 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6750 match(Set dst (SubVB src (LoadVector mem))); 6751 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6752 ins_encode %{ 6753 int vector_len = 1; 6754 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6755 %} 6756 ins_pipe( pipe_slow ); 6757 %} 6758 6759 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6760 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6761 match(Set dst (SubVB src1 src2)); 6762 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6763 ins_encode %{ 6764 int vector_len = 2; 6765 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6771 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6772 match(Set dst (SubVB src (LoadVector mem))); 6773 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6774 ins_encode %{ 6775 int vector_len = 2; 6776 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6777 %} 6778 ins_pipe( pipe_slow ); 6779 %} 6780 6781 // Shorts/Chars vector sub 6782 instruct vsub2S(vecS dst, vecS src) %{ 6783 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6784 match(Set dst (SubVS dst src)); 6785 format %{ "psubw $dst,$src\t! sub packed2S" %} 6786 ins_encode %{ 6787 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6788 %} 6789 ins_pipe( pipe_slow ); 6790 %} 6791 6792 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6793 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6794 match(Set dst (SubVS src1 src2)); 6795 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6796 ins_encode %{ 6797 int vector_len = 0; 6798 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6799 %} 6800 ins_pipe( pipe_slow ); 6801 %} 6802 6803 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6804 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6805 match(Set dst (SubVS src (LoadVector mem))); 6806 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6807 ins_encode %{ 6808 int vector_len = 0; 6809 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6810 %} 6811 ins_pipe( pipe_slow ); 6812 %} 6813 6814 instruct vsub4S(vecD dst, vecD src) %{ 6815 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6816 match(Set dst (SubVS dst src)); 6817 format %{ "psubw $dst,$src\t! sub packed4S" %} 6818 ins_encode %{ 6819 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6820 %} 6821 ins_pipe( pipe_slow ); 6822 %} 6823 6824 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6825 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6826 match(Set dst (SubVS src1 src2)); 6827 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6828 ins_encode %{ 6829 int vector_len = 0; 6830 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6831 %} 6832 ins_pipe( pipe_slow ); 6833 %} 6834 6835 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6836 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6837 match(Set dst (SubVS src (LoadVector mem))); 6838 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6839 ins_encode %{ 6840 int vector_len = 0; 6841 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6842 %} 6843 ins_pipe( pipe_slow ); 6844 %} 6845 6846 instruct vsub8S(vecX dst, vecX src) %{ 6847 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6848 match(Set dst (SubVS dst src)); 6849 format %{ "psubw $dst,$src\t! sub packed8S" %} 6850 ins_encode %{ 6851 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6852 %} 6853 ins_pipe( pipe_slow ); 6854 %} 6855 6856 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6857 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6858 match(Set dst (SubVS src1 src2)); 6859 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6860 ins_encode %{ 6861 int vector_len = 0; 6862 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6868 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6869 match(Set dst (SubVS src (LoadVector mem))); 6870 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6871 ins_encode %{ 6872 int vector_len = 0; 6873 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6874 %} 6875 ins_pipe( pipe_slow ); 6876 %} 6877 6878 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6879 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6880 match(Set dst (SubVS src1 src2)); 6881 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6882 ins_encode %{ 6883 int vector_len = 1; 6884 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6885 %} 6886 ins_pipe( pipe_slow ); 6887 %} 6888 6889 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6890 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6891 match(Set dst (SubVS src (LoadVector mem))); 6892 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6893 ins_encode %{ 6894 int vector_len = 1; 6895 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6901 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6902 match(Set dst (SubVS src1 src2)); 6903 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6904 ins_encode %{ 6905 int vector_len = 2; 6906 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6912 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6913 match(Set dst (SubVS src (LoadVector mem))); 6914 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6915 ins_encode %{ 6916 int vector_len = 2; 6917 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 // Integers vector sub 6923 instruct vsub2I(vecD dst, vecD src) %{ 6924 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6925 match(Set dst (SubVI dst src)); 6926 format %{ "psubd $dst,$src\t! sub packed2I" %} 6927 ins_encode %{ 6928 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6929 %} 6930 ins_pipe( pipe_slow ); 6931 %} 6932 6933 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6934 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6935 match(Set dst (SubVI src1 src2)); 6936 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6937 ins_encode %{ 6938 int vector_len = 0; 6939 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6940 %} 6941 ins_pipe( pipe_slow ); 6942 %} 6943 6944 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6945 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6946 match(Set dst (SubVI src (LoadVector mem))); 6947 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6948 ins_encode %{ 6949 int vector_len = 0; 6950 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 instruct vsub4I(vecX dst, vecX src) %{ 6956 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6957 match(Set dst (SubVI dst src)); 6958 format %{ "psubd $dst,$src\t! sub packed4I" %} 6959 ins_encode %{ 6960 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6966 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6967 match(Set dst (SubVI src1 src2)); 6968 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6969 ins_encode %{ 6970 int vector_len = 0; 6971 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6972 %} 6973 ins_pipe( pipe_slow ); 6974 %} 6975 6976 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6977 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6978 match(Set dst (SubVI src (LoadVector mem))); 6979 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6980 ins_encode %{ 6981 int vector_len = 0; 6982 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6983 %} 6984 ins_pipe( pipe_slow ); 6985 %} 6986 6987 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6988 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6989 match(Set dst (SubVI src1 src2)); 6990 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6991 ins_encode %{ 6992 int vector_len = 1; 6993 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6994 %} 6995 ins_pipe( pipe_slow ); 6996 %} 6997 6998 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6999 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7000 match(Set dst (SubVI src (LoadVector mem))); 7001 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7002 ins_encode %{ 7003 int vector_len = 1; 7004 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7005 %} 7006 ins_pipe( pipe_slow ); 7007 %} 7008 7009 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7010 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7011 match(Set dst (SubVI src1 src2)); 7012 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7013 ins_encode %{ 7014 int vector_len = 2; 7015 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7021 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7022 match(Set dst (SubVI src (LoadVector mem))); 7023 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7024 ins_encode %{ 7025 int vector_len = 2; 7026 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7027 %} 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 // Longs vector sub 7032 instruct vsub2L(vecX dst, vecX src) %{ 7033 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7034 match(Set dst (SubVL dst src)); 7035 format %{ "psubq $dst,$src\t! sub packed2L" %} 7036 ins_encode %{ 7037 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7043 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7044 match(Set dst (SubVL src1 src2)); 7045 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7046 ins_encode %{ 7047 int vector_len = 0; 7048 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7049 %} 7050 ins_pipe( pipe_slow ); 7051 %} 7052 7053 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7054 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7055 match(Set dst (SubVL src (LoadVector mem))); 7056 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7057 ins_encode %{ 7058 int vector_len = 0; 7059 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7060 %} 7061 ins_pipe( pipe_slow ); 7062 %} 7063 7064 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7065 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7066 match(Set dst (SubVL src1 src2)); 7067 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7068 ins_encode %{ 7069 int vector_len = 1; 7070 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7071 %} 7072 ins_pipe( pipe_slow ); 7073 %} 7074 7075 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7076 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7077 match(Set dst (SubVL src (LoadVector mem))); 7078 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7079 ins_encode %{ 7080 int vector_len = 1; 7081 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7082 %} 7083 ins_pipe( pipe_slow ); 7084 %} 7085 7086 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7087 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7088 match(Set dst (SubVL src1 src2)); 7089 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7090 ins_encode %{ 7091 int vector_len = 2; 7092 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7093 %} 7094 ins_pipe( pipe_slow ); 7095 %} 7096 7097 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7098 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7099 match(Set dst (SubVL src (LoadVector mem))); 7100 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7101 ins_encode %{ 7102 int vector_len = 2; 7103 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7104 %} 7105 ins_pipe( pipe_slow ); 7106 %} 7107 7108 // Floats vector sub 7109 instruct vsub2F(vecD dst, vecD src) %{ 7110 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7111 match(Set dst (SubVF dst src)); 7112 format %{ "subps $dst,$src\t! sub packed2F" %} 7113 ins_encode %{ 7114 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7115 %} 7116 ins_pipe( pipe_slow ); 7117 %} 7118 7119 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7120 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7121 match(Set dst (SubVF src1 src2)); 7122 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7123 ins_encode %{ 7124 int vector_len = 0; 7125 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7131 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7132 match(Set dst (SubVF src (LoadVector mem))); 7133 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7134 ins_encode %{ 7135 int vector_len = 0; 7136 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vsub4F(vecX dst, vecX src) %{ 7142 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7143 match(Set dst (SubVF dst src)); 7144 format %{ "subps $dst,$src\t! sub packed4F" %} 7145 ins_encode %{ 7146 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7147 %} 7148 ins_pipe( pipe_slow ); 7149 %} 7150 7151 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7152 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7153 match(Set dst (SubVF src1 src2)); 7154 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7155 ins_encode %{ 7156 int vector_len = 0; 7157 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7158 %} 7159 ins_pipe( pipe_slow ); 7160 %} 7161 7162 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7163 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7164 match(Set dst (SubVF src (LoadVector mem))); 7165 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7166 ins_encode %{ 7167 int vector_len = 0; 7168 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7169 %} 7170 ins_pipe( pipe_slow ); 7171 %} 7172 7173 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7174 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7175 match(Set dst (SubVF src1 src2)); 7176 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7177 ins_encode %{ 7178 int vector_len = 1; 7179 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7180 %} 7181 ins_pipe( pipe_slow ); 7182 %} 7183 7184 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7185 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7186 match(Set dst (SubVF src (LoadVector mem))); 7187 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7188 ins_encode %{ 7189 int vector_len = 1; 7190 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7191 %} 7192 ins_pipe( pipe_slow ); 7193 %} 7194 7195 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7196 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7197 match(Set dst (SubVF src1 src2)); 7198 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7199 ins_encode %{ 7200 int vector_len = 2; 7201 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7202 %} 7203 ins_pipe( pipe_slow ); 7204 %} 7205 7206 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7207 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7208 match(Set dst (SubVF src (LoadVector mem))); 7209 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7210 ins_encode %{ 7211 int vector_len = 2; 7212 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7213 %} 7214 ins_pipe( pipe_slow ); 7215 %} 7216 7217 // Doubles vector sub 7218 instruct vsub2D(vecX dst, vecX src) %{ 7219 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7220 match(Set dst (SubVD dst src)); 7221 format %{ "subpd $dst,$src\t! sub packed2D" %} 7222 ins_encode %{ 7223 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7224 %} 7225 ins_pipe( pipe_slow ); 7226 %} 7227 7228 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7229 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7230 match(Set dst (SubVD src1 src2)); 7231 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7232 ins_encode %{ 7233 int vector_len = 0; 7234 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7235 %} 7236 ins_pipe( pipe_slow ); 7237 %} 7238 7239 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7240 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7241 match(Set dst (SubVD src (LoadVector mem))); 7242 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7243 ins_encode %{ 7244 int vector_len = 0; 7245 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7246 %} 7247 ins_pipe( pipe_slow ); 7248 %} 7249 7250 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7251 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7252 match(Set dst (SubVD src1 src2)); 7253 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7254 ins_encode %{ 7255 int vector_len = 1; 7256 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7257 %} 7258 ins_pipe( pipe_slow ); 7259 %} 7260 7261 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7262 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7263 match(Set dst (SubVD src (LoadVector mem))); 7264 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7265 ins_encode %{ 7266 int vector_len = 1; 7267 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7268 %} 7269 ins_pipe( pipe_slow ); 7270 %} 7271 7272 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7273 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7274 match(Set dst (SubVD src1 src2)); 7275 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7276 ins_encode %{ 7277 int vector_len = 2; 7278 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7279 %} 7280 ins_pipe( pipe_slow ); 7281 %} 7282 7283 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7284 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7285 match(Set dst (SubVD src (LoadVector mem))); 7286 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7287 ins_encode %{ 7288 int vector_len = 2; 7289 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7290 %} 7291 ins_pipe( pipe_slow ); 7292 %} 7293 7294 // --------------------------------- MUL -------------------------------------- 7295 7296 // Shorts/Chars vector mul 7297 instruct vmul2S(vecS dst, vecS src) %{ 7298 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7299 match(Set dst (MulVS dst src)); 7300 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7301 ins_encode %{ 7302 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7308 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7309 match(Set dst (MulVS src1 src2)); 7310 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7311 ins_encode %{ 7312 int vector_len = 0; 7313 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7319 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7320 match(Set dst (MulVS src (LoadVector mem))); 7321 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7322 ins_encode %{ 7323 int vector_len = 0; 7324 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct vmul4S(vecD dst, vecD src) %{ 7330 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7331 match(Set dst (MulVS dst src)); 7332 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7333 ins_encode %{ 7334 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7335 %} 7336 ins_pipe( pipe_slow ); 7337 %} 7338 7339 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7341 match(Set dst (MulVS src1 src2)); 7342 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7343 ins_encode %{ 7344 int vector_len = 0; 7345 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7346 %} 7347 ins_pipe( pipe_slow ); 7348 %} 7349 7350 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7351 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7352 match(Set dst (MulVS src (LoadVector mem))); 7353 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7354 ins_encode %{ 7355 int vector_len = 0; 7356 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7357 %} 7358 ins_pipe( pipe_slow ); 7359 %} 7360 7361 instruct vmul8S(vecX dst, vecX src) %{ 7362 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7363 match(Set dst (MulVS dst src)); 7364 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7365 ins_encode %{ 7366 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7367 %} 7368 ins_pipe( pipe_slow ); 7369 %} 7370 7371 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7372 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7373 match(Set dst (MulVS src1 src2)); 7374 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7375 ins_encode %{ 7376 int vector_len = 0; 7377 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7378 %} 7379 ins_pipe( pipe_slow ); 7380 %} 7381 7382 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7383 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7384 match(Set dst (MulVS src (LoadVector mem))); 7385 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7386 ins_encode %{ 7387 int vector_len = 0; 7388 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7389 %} 7390 ins_pipe( pipe_slow ); 7391 %} 7392 7393 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7394 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7395 match(Set dst (MulVS src1 src2)); 7396 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7397 ins_encode %{ 7398 int vector_len = 1; 7399 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7400 %} 7401 ins_pipe( pipe_slow ); 7402 %} 7403 7404 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7405 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7406 match(Set dst (MulVS src (LoadVector mem))); 7407 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7408 ins_encode %{ 7409 int vector_len = 1; 7410 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7411 %} 7412 ins_pipe( pipe_slow ); 7413 %} 7414 7415 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7416 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7417 match(Set dst (MulVS src1 src2)); 7418 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7419 ins_encode %{ 7420 int vector_len = 2; 7421 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7422 %} 7423 ins_pipe( pipe_slow ); 7424 %} 7425 7426 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7427 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7428 match(Set dst (MulVS src (LoadVector mem))); 7429 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7430 ins_encode %{ 7431 int vector_len = 2; 7432 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7433 %} 7434 ins_pipe( pipe_slow ); 7435 %} 7436 7437 // Integers vector mul (sse4_1) 7438 instruct vmul2I(vecD dst, vecD src) %{ 7439 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7440 match(Set dst (MulVI dst src)); 7441 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7442 ins_encode %{ 7443 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7444 %} 7445 ins_pipe( pipe_slow ); 7446 %} 7447 7448 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7450 match(Set dst (MulVI src1 src2)); 7451 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7452 ins_encode %{ 7453 int vector_len = 0; 7454 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7455 %} 7456 ins_pipe( pipe_slow ); 7457 %} 7458 7459 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7461 match(Set dst (MulVI src (LoadVector mem))); 7462 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7463 ins_encode %{ 7464 int vector_len = 0; 7465 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7466 %} 7467 ins_pipe( pipe_slow ); 7468 %} 7469 7470 instruct vmul4I(vecX dst, vecX src) %{ 7471 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7472 match(Set dst (MulVI dst src)); 7473 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7474 ins_encode %{ 7475 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7476 %} 7477 ins_pipe( pipe_slow ); 7478 %} 7479 7480 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7481 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7482 match(Set dst (MulVI src1 src2)); 7483 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7484 ins_encode %{ 7485 int vector_len = 0; 7486 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7487 %} 7488 ins_pipe( pipe_slow ); 7489 %} 7490 7491 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7492 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7493 match(Set dst (MulVI src (LoadVector mem))); 7494 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7495 ins_encode %{ 7496 int vector_len = 0; 7497 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7498 %} 7499 ins_pipe( pipe_slow ); 7500 %} 7501 7502 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7503 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7504 match(Set dst (MulVL src1 src2)); 7505 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7506 ins_encode %{ 7507 int vector_len = 0; 7508 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7509 %} 7510 ins_pipe( pipe_slow ); 7511 %} 7512 7513 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7514 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7515 match(Set dst (MulVL src (LoadVector mem))); 7516 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7517 ins_encode %{ 7518 int vector_len = 0; 7519 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7520 %} 7521 ins_pipe( pipe_slow ); 7522 %} 7523 7524 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7525 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7526 match(Set dst (MulVL src1 src2)); 7527 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7528 ins_encode %{ 7529 int vector_len = 1; 7530 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7531 %} 7532 ins_pipe( pipe_slow ); 7533 %} 7534 7535 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7536 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7537 match(Set dst (MulVL src (LoadVector mem))); 7538 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7539 ins_encode %{ 7540 int vector_len = 1; 7541 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7542 %} 7543 ins_pipe( pipe_slow ); 7544 %} 7545 7546 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7547 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7548 match(Set dst (MulVL src1 src2)); 7549 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7550 ins_encode %{ 7551 int vector_len = 2; 7552 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7558 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7559 match(Set dst (MulVL src (LoadVector mem))); 7560 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7561 ins_encode %{ 7562 int vector_len = 2; 7563 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7564 %} 7565 ins_pipe( pipe_slow ); 7566 %} 7567 7568 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7569 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7570 match(Set dst (MulVI src1 src2)); 7571 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7572 ins_encode %{ 7573 int vector_len = 1; 7574 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7580 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7581 match(Set dst (MulVI src (LoadVector mem))); 7582 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7583 ins_encode %{ 7584 int vector_len = 1; 7585 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7586 %} 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7591 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7592 match(Set dst (MulVI src1 src2)); 7593 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7594 ins_encode %{ 7595 int vector_len = 2; 7596 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7597 %} 7598 ins_pipe( pipe_slow ); 7599 %} 7600 7601 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7602 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7603 match(Set dst (MulVI src (LoadVector mem))); 7604 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7605 ins_encode %{ 7606 int vector_len = 2; 7607 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7608 %} 7609 ins_pipe( pipe_slow ); 7610 %} 7611 7612 // Floats vector mul 7613 instruct vmul2F(vecD dst, vecD src) %{ 7614 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7615 match(Set dst (MulVF dst src)); 7616 format %{ "mulps $dst,$src\t! mul packed2F" %} 7617 ins_encode %{ 7618 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7619 %} 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7624 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7625 match(Set dst (MulVF src1 src2)); 7626 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7627 ins_encode %{ 7628 int vector_len = 0; 7629 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7630 %} 7631 ins_pipe( pipe_slow ); 7632 %} 7633 7634 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7635 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7636 match(Set dst (MulVF src (LoadVector mem))); 7637 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7638 ins_encode %{ 7639 int vector_len = 0; 7640 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7641 %} 7642 ins_pipe( pipe_slow ); 7643 %} 7644 7645 instruct vmul4F(vecX dst, vecX src) %{ 7646 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7647 match(Set dst (MulVF dst src)); 7648 format %{ "mulps $dst,$src\t! mul packed4F" %} 7649 ins_encode %{ 7650 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7651 %} 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7656 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7657 match(Set dst (MulVF src1 src2)); 7658 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7659 ins_encode %{ 7660 int vector_len = 0; 7661 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7668 match(Set dst (MulVF src (LoadVector mem))); 7669 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7670 ins_encode %{ 7671 int vector_len = 0; 7672 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7673 %} 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7678 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7679 match(Set dst (MulVF src1 src2)); 7680 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7681 ins_encode %{ 7682 int vector_len = 1; 7683 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7684 %} 7685 ins_pipe( pipe_slow ); 7686 %} 7687 7688 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7689 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7690 match(Set dst (MulVF src (LoadVector mem))); 7691 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7692 ins_encode %{ 7693 int vector_len = 1; 7694 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7695 %} 7696 ins_pipe( pipe_slow ); 7697 %} 7698 7699 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7700 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7701 match(Set dst (MulVF src1 src2)); 7702 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7703 ins_encode %{ 7704 int vector_len = 2; 7705 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7706 %} 7707 ins_pipe( pipe_slow ); 7708 %} 7709 7710 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7711 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7712 match(Set dst (MulVF src (LoadVector mem))); 7713 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7714 ins_encode %{ 7715 int vector_len = 2; 7716 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7717 %} 7718 ins_pipe( pipe_slow ); 7719 %} 7720 7721 // Doubles vector mul 7722 instruct vmul2D(vecX dst, vecX src) %{ 7723 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7724 match(Set dst (MulVD dst src)); 7725 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7726 ins_encode %{ 7727 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7728 %} 7729 ins_pipe( pipe_slow ); 7730 %} 7731 7732 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7733 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7734 match(Set dst (MulVD src1 src2)); 7735 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7736 ins_encode %{ 7737 int vector_len = 0; 7738 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7744 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7745 match(Set dst (MulVD src (LoadVector mem))); 7746 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7747 ins_encode %{ 7748 int vector_len = 0; 7749 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7755 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7756 match(Set dst (MulVD src1 src2)); 7757 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7758 ins_encode %{ 7759 int vector_len = 1; 7760 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7767 match(Set dst (MulVD src (LoadVector mem))); 7768 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7769 ins_encode %{ 7770 int vector_len = 1; 7771 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7772 %} 7773 ins_pipe( pipe_slow ); 7774 %} 7775 7776 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7777 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7778 match(Set dst (MulVD src1 src2)); 7779 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7780 ins_encode %{ 7781 int vector_len = 2; 7782 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7788 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7789 match(Set dst (MulVD src (LoadVector mem))); 7790 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7791 ins_encode %{ 7792 int vector_len = 2; 7793 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7794 %} 7795 ins_pipe( pipe_slow ); 7796 %} 7797 7798 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7800 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7801 effect(TEMP dst, USE src1, USE src2); 7802 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7803 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7804 %} 7805 ins_encode %{ 7806 int vector_len = 1; 7807 int cond = (Assembler::Condition)($copnd$$cmpcode); 7808 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7809 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7810 %} 7811 ins_pipe( pipe_slow ); 7812 %} 7813 7814 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7815 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7816 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7817 effect(TEMP dst, USE src1, USE src2); 7818 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7819 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7820 %} 7821 ins_encode %{ 7822 int vector_len = 1; 7823 int cond = (Assembler::Condition)($copnd$$cmpcode); 7824 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7825 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7826 %} 7827 ins_pipe( pipe_slow ); 7828 %} 7829 7830 // --------------------------------- DIV -------------------------------------- 7831 7832 // Floats vector div 7833 instruct vdiv2F(vecD dst, vecD src) %{ 7834 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7835 match(Set dst (DivVF dst src)); 7836 format %{ "divps $dst,$src\t! div packed2F" %} 7837 ins_encode %{ 7838 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7839 %} 7840 ins_pipe( pipe_slow ); 7841 %} 7842 7843 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7844 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7845 match(Set dst (DivVF src1 src2)); 7846 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7847 ins_encode %{ 7848 int vector_len = 0; 7849 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7850 %} 7851 ins_pipe( pipe_slow ); 7852 %} 7853 7854 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7855 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7856 match(Set dst (DivVF src (LoadVector mem))); 7857 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7858 ins_encode %{ 7859 int vector_len = 0; 7860 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7861 %} 7862 ins_pipe( pipe_slow ); 7863 %} 7864 7865 instruct vdiv4F(vecX dst, vecX src) %{ 7866 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7867 match(Set dst (DivVF dst src)); 7868 format %{ "divps $dst,$src\t! div packed4F" %} 7869 ins_encode %{ 7870 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7871 %} 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7876 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7877 match(Set dst (DivVF src1 src2)); 7878 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7879 ins_encode %{ 7880 int vector_len = 0; 7881 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7887 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7888 match(Set dst (DivVF src (LoadVector mem))); 7889 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7890 ins_encode %{ 7891 int vector_len = 0; 7892 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7898 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7899 match(Set dst (DivVF src1 src2)); 7900 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7901 ins_encode %{ 7902 int vector_len = 1; 7903 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7909 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7910 match(Set dst (DivVF src (LoadVector mem))); 7911 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7912 ins_encode %{ 7913 int vector_len = 1; 7914 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7920 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7921 match(Set dst (DivVF src1 src2)); 7922 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7923 ins_encode %{ 7924 int vector_len = 2; 7925 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7931 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7932 match(Set dst (DivVF src (LoadVector mem))); 7933 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7934 ins_encode %{ 7935 int vector_len = 2; 7936 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 // Doubles vector div 7942 instruct vdiv2D(vecX dst, vecX src) %{ 7943 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7944 match(Set dst (DivVD dst src)); 7945 format %{ "divpd $dst,$src\t! div packed2D" %} 7946 ins_encode %{ 7947 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7948 %} 7949 ins_pipe( pipe_slow ); 7950 %} 7951 7952 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7953 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7954 match(Set dst (DivVD src1 src2)); 7955 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7956 ins_encode %{ 7957 int vector_len = 0; 7958 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7959 %} 7960 ins_pipe( pipe_slow ); 7961 %} 7962 7963 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7964 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7965 match(Set dst (DivVD src (LoadVector mem))); 7966 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7967 ins_encode %{ 7968 int vector_len = 0; 7969 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7970 %} 7971 ins_pipe( pipe_slow ); 7972 %} 7973 7974 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7975 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7976 match(Set dst (DivVD src1 src2)); 7977 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7978 ins_encode %{ 7979 int vector_len = 1; 7980 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7986 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7987 match(Set dst (DivVD src (LoadVector mem))); 7988 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7989 ins_encode %{ 7990 int vector_len = 1; 7991 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7997 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7998 match(Set dst (DivVD src1 src2)); 7999 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8000 ins_encode %{ 8001 int vector_len = 2; 8002 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8008 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8009 match(Set dst (DivVD src (LoadVector mem))); 8010 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8011 ins_encode %{ 8012 int vector_len = 2; 8013 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 // ------------------------------ Shift --------------------------------------- 8019 8020 // Left and right shift count vectors are the same on x86 8021 // (only lowest bits of xmm reg are used for count). 8022 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8023 match(Set dst (LShiftCntV cnt)); 8024 match(Set dst (RShiftCntV cnt)); 8025 format %{ "movd $dst,$cnt\t! load shift count" %} 8026 ins_encode %{ 8027 __ movdl($dst$$XMMRegister, $cnt$$Register); 8028 %} 8029 ins_pipe( pipe_slow ); 8030 %} 8031 8032 // --------------------------------- Sqrt -------------------------------------- 8033 8034 // Floating point vector sqrt 8035 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8036 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8037 match(Set dst (SqrtVD src)); 8038 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8039 ins_encode %{ 8040 int vector_len = 0; 8041 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8042 %} 8043 ins_pipe( pipe_slow ); 8044 %} 8045 8046 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8047 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8048 match(Set dst (SqrtVD (LoadVector mem))); 8049 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8050 ins_encode %{ 8051 int vector_len = 0; 8052 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8053 %} 8054 ins_pipe( pipe_slow ); 8055 %} 8056 8057 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8058 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8059 match(Set dst (SqrtVD src)); 8060 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8061 ins_encode %{ 8062 int vector_len = 1; 8063 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8064 %} 8065 ins_pipe( pipe_slow ); 8066 %} 8067 8068 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8069 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8070 match(Set dst (SqrtVD (LoadVector mem))); 8071 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8072 ins_encode %{ 8073 int vector_len = 1; 8074 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8075 %} 8076 ins_pipe( pipe_slow ); 8077 %} 8078 8079 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8080 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8081 match(Set dst (SqrtVD src)); 8082 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8083 ins_encode %{ 8084 int vector_len = 2; 8085 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8086 %} 8087 ins_pipe( pipe_slow ); 8088 %} 8089 8090 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8091 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8092 match(Set dst (SqrtVD (LoadVector mem))); 8093 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8094 ins_encode %{ 8095 int vector_len = 2; 8096 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8097 %} 8098 ins_pipe( pipe_slow ); 8099 %} 8100 8101 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8102 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8103 match(Set dst (SqrtVF src)); 8104 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8105 ins_encode %{ 8106 int vector_len = 0; 8107 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8108 %} 8109 ins_pipe( pipe_slow ); 8110 %} 8111 8112 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8113 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8114 match(Set dst (SqrtVF (LoadVector mem))); 8115 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8116 ins_encode %{ 8117 int vector_len = 0; 8118 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8119 %} 8120 ins_pipe( pipe_slow ); 8121 %} 8122 8123 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8124 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8125 match(Set dst (SqrtVF src)); 8126 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8127 ins_encode %{ 8128 int vector_len = 0; 8129 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8130 %} 8131 ins_pipe( pipe_slow ); 8132 %} 8133 8134 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8136 match(Set dst (SqrtVF (LoadVector mem))); 8137 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8138 ins_encode %{ 8139 int vector_len = 0; 8140 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8141 %} 8142 ins_pipe( pipe_slow ); 8143 %} 8144 8145 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8146 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8147 match(Set dst (SqrtVF src)); 8148 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8149 ins_encode %{ 8150 int vector_len = 1; 8151 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8152 %} 8153 ins_pipe( pipe_slow ); 8154 %} 8155 8156 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8157 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8158 match(Set dst (SqrtVF (LoadVector mem))); 8159 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8160 ins_encode %{ 8161 int vector_len = 1; 8162 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8168 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8169 match(Set dst (SqrtVF src)); 8170 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8171 ins_encode %{ 8172 int vector_len = 2; 8173 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8174 %} 8175 ins_pipe( pipe_slow ); 8176 %} 8177 8178 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8179 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8180 match(Set dst (SqrtVF (LoadVector mem))); 8181 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8182 ins_encode %{ 8183 int vector_len = 2; 8184 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8185 %} 8186 ins_pipe( pipe_slow ); 8187 %} 8188 8189 // ------------------------------ LeftShift ----------------------------------- 8190 8191 // Shorts/Chars vector left shift 8192 instruct vsll2S(vecS dst, vecS shift) %{ 8193 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8194 match(Set dst (LShiftVS dst shift)); 8195 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8196 ins_encode %{ 8197 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8198 %} 8199 ins_pipe( pipe_slow ); 8200 %} 8201 8202 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8203 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8204 match(Set dst (LShiftVS dst shift)); 8205 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8206 ins_encode %{ 8207 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8208 %} 8209 ins_pipe( pipe_slow ); 8210 %} 8211 8212 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8213 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8214 match(Set dst (LShiftVS src shift)); 8215 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8216 ins_encode %{ 8217 int vector_len = 0; 8218 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8219 %} 8220 ins_pipe( pipe_slow ); 8221 %} 8222 8223 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8224 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8225 match(Set dst (LShiftVS src shift)); 8226 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8227 ins_encode %{ 8228 int vector_len = 0; 8229 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8230 %} 8231 ins_pipe( pipe_slow ); 8232 %} 8233 8234 instruct vsll4S(vecD dst, vecS shift) %{ 8235 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8236 match(Set dst (LShiftVS dst shift)); 8237 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8238 ins_encode %{ 8239 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8245 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8246 match(Set dst (LShiftVS dst shift)); 8247 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8248 ins_encode %{ 8249 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8255 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8256 match(Set dst (LShiftVS src shift)); 8257 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8258 ins_encode %{ 8259 int vector_len = 0; 8260 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8266 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8267 match(Set dst (LShiftVS src shift)); 8268 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8269 ins_encode %{ 8270 int vector_len = 0; 8271 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 instruct vsll8S(vecX dst, vecS shift) %{ 8277 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8278 match(Set dst (LShiftVS dst shift)); 8279 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8280 ins_encode %{ 8281 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8282 %} 8283 ins_pipe( pipe_slow ); 8284 %} 8285 8286 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8287 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8288 match(Set dst (LShiftVS dst shift)); 8289 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8290 ins_encode %{ 8291 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8292 %} 8293 ins_pipe( pipe_slow ); 8294 %} 8295 8296 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8297 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8298 match(Set dst (LShiftVS src shift)); 8299 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8300 ins_encode %{ 8301 int vector_len = 0; 8302 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8303 %} 8304 ins_pipe( pipe_slow ); 8305 %} 8306 8307 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8308 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8309 match(Set dst (LShiftVS src shift)); 8310 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8311 ins_encode %{ 8312 int vector_len = 0; 8313 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8314 %} 8315 ins_pipe( pipe_slow ); 8316 %} 8317 8318 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8319 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8320 match(Set dst (LShiftVS src shift)); 8321 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8322 ins_encode %{ 8323 int vector_len = 1; 8324 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8330 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8331 match(Set dst (LShiftVS src shift)); 8332 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8333 ins_encode %{ 8334 int vector_len = 1; 8335 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8336 %} 8337 ins_pipe( pipe_slow ); 8338 %} 8339 8340 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8341 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8342 match(Set dst (LShiftVS src shift)); 8343 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8344 ins_encode %{ 8345 int vector_len = 2; 8346 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8347 %} 8348 ins_pipe( pipe_slow ); 8349 %} 8350 8351 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8352 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8353 match(Set dst (LShiftVS src shift)); 8354 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8355 ins_encode %{ 8356 int vector_len = 2; 8357 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8358 %} 8359 ins_pipe( pipe_slow ); 8360 %} 8361 8362 // Integers vector left shift 8363 instruct vsll2I(vecD dst, vecS shift) %{ 8364 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8365 match(Set dst (LShiftVI dst shift)); 8366 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8367 ins_encode %{ 8368 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8369 %} 8370 ins_pipe( pipe_slow ); 8371 %} 8372 8373 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8374 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8375 match(Set dst (LShiftVI dst shift)); 8376 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8377 ins_encode %{ 8378 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8379 %} 8380 ins_pipe( pipe_slow ); 8381 %} 8382 8383 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8384 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8385 match(Set dst (LShiftVI src shift)); 8386 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8387 ins_encode %{ 8388 int vector_len = 0; 8389 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8390 %} 8391 ins_pipe( pipe_slow ); 8392 %} 8393 8394 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8396 match(Set dst (LShiftVI src shift)); 8397 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8398 ins_encode %{ 8399 int vector_len = 0; 8400 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 instruct vsll4I(vecX dst, vecS shift) %{ 8406 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8407 match(Set dst (LShiftVI dst shift)); 8408 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8409 ins_encode %{ 8410 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8411 %} 8412 ins_pipe( pipe_slow ); 8413 %} 8414 8415 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8416 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8417 match(Set dst (LShiftVI dst shift)); 8418 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8419 ins_encode %{ 8420 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8421 %} 8422 ins_pipe( pipe_slow ); 8423 %} 8424 8425 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8427 match(Set dst (LShiftVI src shift)); 8428 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8429 ins_encode %{ 8430 int vector_len = 0; 8431 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8432 %} 8433 ins_pipe( pipe_slow ); 8434 %} 8435 8436 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8438 match(Set dst (LShiftVI src shift)); 8439 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8440 ins_encode %{ 8441 int vector_len = 0; 8442 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8443 %} 8444 ins_pipe( pipe_slow ); 8445 %} 8446 8447 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8448 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8449 match(Set dst (LShiftVI src shift)); 8450 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8451 ins_encode %{ 8452 int vector_len = 1; 8453 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8454 %} 8455 ins_pipe( pipe_slow ); 8456 %} 8457 8458 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8459 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8460 match(Set dst (LShiftVI src shift)); 8461 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8462 ins_encode %{ 8463 int vector_len = 1; 8464 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8465 %} 8466 ins_pipe( pipe_slow ); 8467 %} 8468 8469 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8470 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8471 match(Set dst (LShiftVI src shift)); 8472 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8473 ins_encode %{ 8474 int vector_len = 2; 8475 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8476 %} 8477 ins_pipe( pipe_slow ); 8478 %} 8479 8480 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8481 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8482 match(Set dst (LShiftVI src shift)); 8483 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8484 ins_encode %{ 8485 int vector_len = 2; 8486 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8487 %} 8488 ins_pipe( pipe_slow ); 8489 %} 8490 8491 // Longs vector left shift 8492 instruct vsll2L(vecX dst, vecS shift) %{ 8493 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8494 match(Set dst (LShiftVL dst shift)); 8495 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8496 ins_encode %{ 8497 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8503 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8504 match(Set dst (LShiftVL dst shift)); 8505 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8506 ins_encode %{ 8507 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8508 %} 8509 ins_pipe( pipe_slow ); 8510 %} 8511 8512 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8514 match(Set dst (LShiftVL src shift)); 8515 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8516 ins_encode %{ 8517 int vector_len = 0; 8518 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8519 %} 8520 ins_pipe( pipe_slow ); 8521 %} 8522 8523 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8525 match(Set dst (LShiftVL src shift)); 8526 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8527 ins_encode %{ 8528 int vector_len = 0; 8529 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8530 %} 8531 ins_pipe( pipe_slow ); 8532 %} 8533 8534 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8536 match(Set dst (LShiftVL src shift)); 8537 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8538 ins_encode %{ 8539 int vector_len = 1; 8540 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8541 %} 8542 ins_pipe( pipe_slow ); 8543 %} 8544 8545 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8546 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8547 match(Set dst (LShiftVL src shift)); 8548 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8549 ins_encode %{ 8550 int vector_len = 1; 8551 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8552 %} 8553 ins_pipe( pipe_slow ); 8554 %} 8555 8556 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8557 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8558 match(Set dst (LShiftVL src shift)); 8559 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8560 ins_encode %{ 8561 int vector_len = 2; 8562 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8563 %} 8564 ins_pipe( pipe_slow ); 8565 %} 8566 8567 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8568 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8569 match(Set dst (LShiftVL src shift)); 8570 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8571 ins_encode %{ 8572 int vector_len = 2; 8573 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8574 %} 8575 ins_pipe( pipe_slow ); 8576 %} 8577 8578 // ----------------------- LogicalRightShift ----------------------------------- 8579 8580 // Shorts vector logical right shift produces incorrect Java result 8581 // for negative data because java code convert short value into int with 8582 // sign extension before a shift. But char vectors are fine since chars are 8583 // unsigned values. 8584 8585 instruct vsrl2S(vecS dst, vecS shift) %{ 8586 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8587 match(Set dst (URShiftVS dst shift)); 8588 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8589 ins_encode %{ 8590 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8591 %} 8592 ins_pipe( pipe_slow ); 8593 %} 8594 8595 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8596 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8597 match(Set dst (URShiftVS dst shift)); 8598 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8599 ins_encode %{ 8600 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8606 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8607 match(Set dst (URShiftVS src shift)); 8608 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8609 ins_encode %{ 8610 int vector_len = 0; 8611 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8612 %} 8613 ins_pipe( pipe_slow ); 8614 %} 8615 8616 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8618 match(Set dst (URShiftVS src shift)); 8619 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8620 ins_encode %{ 8621 int vector_len = 0; 8622 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8623 %} 8624 ins_pipe( pipe_slow ); 8625 %} 8626 8627 instruct vsrl4S(vecD dst, vecS shift) %{ 8628 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8629 match(Set dst (URShiftVS dst shift)); 8630 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8631 ins_encode %{ 8632 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8633 %} 8634 ins_pipe( pipe_slow ); 8635 %} 8636 8637 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8638 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8639 match(Set dst (URShiftVS dst shift)); 8640 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8641 ins_encode %{ 8642 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8648 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8649 match(Set dst (URShiftVS src shift)); 8650 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8651 ins_encode %{ 8652 int vector_len = 0; 8653 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8654 %} 8655 ins_pipe( pipe_slow ); 8656 %} 8657 8658 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8659 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8660 match(Set dst (URShiftVS src shift)); 8661 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8662 ins_encode %{ 8663 int vector_len = 0; 8664 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8665 %} 8666 ins_pipe( pipe_slow ); 8667 %} 8668 8669 instruct vsrl8S(vecX dst, vecS shift) %{ 8670 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8671 match(Set dst (URShiftVS dst shift)); 8672 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8673 ins_encode %{ 8674 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8680 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8681 match(Set dst (URShiftVS dst shift)); 8682 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8683 ins_encode %{ 8684 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8685 %} 8686 ins_pipe( pipe_slow ); 8687 %} 8688 8689 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8691 match(Set dst (URShiftVS src shift)); 8692 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8693 ins_encode %{ 8694 int vector_len = 0; 8695 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8696 %} 8697 ins_pipe( pipe_slow ); 8698 %} 8699 8700 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8701 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8702 match(Set dst (URShiftVS src shift)); 8703 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8704 ins_encode %{ 8705 int vector_len = 0; 8706 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8707 %} 8708 ins_pipe( pipe_slow ); 8709 %} 8710 8711 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8712 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8713 match(Set dst (URShiftVS src shift)); 8714 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8715 ins_encode %{ 8716 int vector_len = 1; 8717 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8718 %} 8719 ins_pipe( pipe_slow ); 8720 %} 8721 8722 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8723 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8724 match(Set dst (URShiftVS src shift)); 8725 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8726 ins_encode %{ 8727 int vector_len = 1; 8728 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8729 %} 8730 ins_pipe( pipe_slow ); 8731 %} 8732 8733 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8734 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8735 match(Set dst (URShiftVS src shift)); 8736 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8737 ins_encode %{ 8738 int vector_len = 2; 8739 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8740 %} 8741 ins_pipe( pipe_slow ); 8742 %} 8743 8744 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8745 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8746 match(Set dst (URShiftVS src shift)); 8747 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8748 ins_encode %{ 8749 int vector_len = 2; 8750 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8751 %} 8752 ins_pipe( pipe_slow ); 8753 %} 8754 8755 // Integers vector logical right shift 8756 instruct vsrl2I(vecD dst, vecS shift) %{ 8757 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8758 match(Set dst (URShiftVI dst shift)); 8759 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8760 ins_encode %{ 8761 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8762 %} 8763 ins_pipe( pipe_slow ); 8764 %} 8765 8766 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8767 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8768 match(Set dst (URShiftVI dst shift)); 8769 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8770 ins_encode %{ 8771 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8772 %} 8773 ins_pipe( pipe_slow ); 8774 %} 8775 8776 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8777 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8778 match(Set dst (URShiftVI src shift)); 8779 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8780 ins_encode %{ 8781 int vector_len = 0; 8782 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8783 %} 8784 ins_pipe( pipe_slow ); 8785 %} 8786 8787 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8788 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8789 match(Set dst (URShiftVI src shift)); 8790 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8791 ins_encode %{ 8792 int vector_len = 0; 8793 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8794 %} 8795 ins_pipe( pipe_slow ); 8796 %} 8797 8798 instruct vsrl4I(vecX dst, vecS shift) %{ 8799 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8800 match(Set dst (URShiftVI dst shift)); 8801 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8802 ins_encode %{ 8803 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8804 %} 8805 ins_pipe( pipe_slow ); 8806 %} 8807 8808 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8809 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8810 match(Set dst (URShiftVI dst shift)); 8811 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8812 ins_encode %{ 8813 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8814 %} 8815 ins_pipe( pipe_slow ); 8816 %} 8817 8818 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8819 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8820 match(Set dst (URShiftVI src shift)); 8821 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8822 ins_encode %{ 8823 int vector_len = 0; 8824 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8825 %} 8826 ins_pipe( pipe_slow ); 8827 %} 8828 8829 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8831 match(Set dst (URShiftVI src shift)); 8832 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8833 ins_encode %{ 8834 int vector_len = 0; 8835 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8836 %} 8837 ins_pipe( pipe_slow ); 8838 %} 8839 8840 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8841 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8842 match(Set dst (URShiftVI src shift)); 8843 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8844 ins_encode %{ 8845 int vector_len = 1; 8846 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8847 %} 8848 ins_pipe( pipe_slow ); 8849 %} 8850 8851 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8852 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8853 match(Set dst (URShiftVI src shift)); 8854 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8855 ins_encode %{ 8856 int vector_len = 1; 8857 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8858 %} 8859 ins_pipe( pipe_slow ); 8860 %} 8861 8862 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8863 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8864 match(Set dst (URShiftVI src shift)); 8865 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8866 ins_encode %{ 8867 int vector_len = 2; 8868 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8869 %} 8870 ins_pipe( pipe_slow ); 8871 %} 8872 8873 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8874 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8875 match(Set dst (URShiftVI src shift)); 8876 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8877 ins_encode %{ 8878 int vector_len = 2; 8879 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 // Longs vector logical right shift 8885 instruct vsrl2L(vecX dst, vecS shift) %{ 8886 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8887 match(Set dst (URShiftVL dst shift)); 8888 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8889 ins_encode %{ 8890 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8896 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8897 match(Set dst (URShiftVL dst shift)); 8898 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8899 ins_encode %{ 8900 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8901 %} 8902 ins_pipe( pipe_slow ); 8903 %} 8904 8905 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8906 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8907 match(Set dst (URShiftVL src shift)); 8908 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8909 ins_encode %{ 8910 int vector_len = 0; 8911 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8912 %} 8913 ins_pipe( pipe_slow ); 8914 %} 8915 8916 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8917 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8918 match(Set dst (URShiftVL src shift)); 8919 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8920 ins_encode %{ 8921 int vector_len = 0; 8922 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8923 %} 8924 ins_pipe( pipe_slow ); 8925 %} 8926 8927 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8928 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8929 match(Set dst (URShiftVL src shift)); 8930 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8931 ins_encode %{ 8932 int vector_len = 1; 8933 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8934 %} 8935 ins_pipe( pipe_slow ); 8936 %} 8937 8938 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8939 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8940 match(Set dst (URShiftVL src shift)); 8941 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8942 ins_encode %{ 8943 int vector_len = 1; 8944 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8945 %} 8946 ins_pipe( pipe_slow ); 8947 %} 8948 8949 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8950 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8951 match(Set dst (URShiftVL src shift)); 8952 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8953 ins_encode %{ 8954 int vector_len = 2; 8955 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8956 %} 8957 ins_pipe( pipe_slow ); 8958 %} 8959 8960 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8961 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8962 match(Set dst (URShiftVL src shift)); 8963 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8964 ins_encode %{ 8965 int vector_len = 2; 8966 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8967 %} 8968 ins_pipe( pipe_slow ); 8969 %} 8970 8971 // ------------------- ArithmeticRightShift ----------------------------------- 8972 8973 // Shorts/Chars vector arithmetic right shift 8974 instruct vsra2S(vecS dst, vecS shift) %{ 8975 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8976 match(Set dst (RShiftVS dst shift)); 8977 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8978 ins_encode %{ 8979 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8980 %} 8981 ins_pipe( pipe_slow ); 8982 %} 8983 8984 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8985 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8986 match(Set dst (RShiftVS dst shift)); 8987 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8988 ins_encode %{ 8989 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8990 %} 8991 ins_pipe( pipe_slow ); 8992 %} 8993 8994 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8995 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8996 match(Set dst (RShiftVS src shift)); 8997 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8998 ins_encode %{ 8999 int vector_len = 0; 9000 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9006 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9007 match(Set dst (RShiftVS src shift)); 9008 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9009 ins_encode %{ 9010 int vector_len = 0; 9011 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9012 %} 9013 ins_pipe( pipe_slow ); 9014 %} 9015 9016 instruct vsra4S(vecD dst, vecS shift) %{ 9017 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9018 match(Set dst (RShiftVS dst shift)); 9019 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9020 ins_encode %{ 9021 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9022 %} 9023 ins_pipe( pipe_slow ); 9024 %} 9025 9026 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9027 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9028 match(Set dst (RShiftVS dst shift)); 9029 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9030 ins_encode %{ 9031 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9037 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9038 match(Set dst (RShiftVS src shift)); 9039 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9040 ins_encode %{ 9041 int vector_len = 0; 9042 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9048 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9049 match(Set dst (RShiftVS src shift)); 9050 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9051 ins_encode %{ 9052 int vector_len = 0; 9053 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 instruct vsra8S(vecX dst, vecS shift) %{ 9059 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9060 match(Set dst (RShiftVS dst shift)); 9061 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9062 ins_encode %{ 9063 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9069 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9070 match(Set dst (RShiftVS dst shift)); 9071 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9072 ins_encode %{ 9073 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9074 %} 9075 ins_pipe( pipe_slow ); 9076 %} 9077 9078 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9079 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9080 match(Set dst (RShiftVS src shift)); 9081 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9082 ins_encode %{ 9083 int vector_len = 0; 9084 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9085 %} 9086 ins_pipe( pipe_slow ); 9087 %} 9088 9089 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9090 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9091 match(Set dst (RShiftVS src shift)); 9092 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9093 ins_encode %{ 9094 int vector_len = 0; 9095 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9096 %} 9097 ins_pipe( pipe_slow ); 9098 %} 9099 9100 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9101 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9102 match(Set dst (RShiftVS src shift)); 9103 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9104 ins_encode %{ 9105 int vector_len = 1; 9106 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9107 %} 9108 ins_pipe( pipe_slow ); 9109 %} 9110 9111 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9112 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9113 match(Set dst (RShiftVS src shift)); 9114 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9115 ins_encode %{ 9116 int vector_len = 1; 9117 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9118 %} 9119 ins_pipe( pipe_slow ); 9120 %} 9121 9122 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9123 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9124 match(Set dst (RShiftVS src shift)); 9125 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9126 ins_encode %{ 9127 int vector_len = 2; 9128 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9129 %} 9130 ins_pipe( pipe_slow ); 9131 %} 9132 9133 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9134 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9135 match(Set dst (RShiftVS src shift)); 9136 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9137 ins_encode %{ 9138 int vector_len = 2; 9139 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9140 %} 9141 ins_pipe( pipe_slow ); 9142 %} 9143 9144 // Integers vector arithmetic right shift 9145 instruct vsra2I(vecD dst, vecS shift) %{ 9146 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9147 match(Set dst (RShiftVI dst shift)); 9148 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9149 ins_encode %{ 9150 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9151 %} 9152 ins_pipe( pipe_slow ); 9153 %} 9154 9155 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9156 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9157 match(Set dst (RShiftVI dst shift)); 9158 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9159 ins_encode %{ 9160 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9161 %} 9162 ins_pipe( pipe_slow ); 9163 %} 9164 9165 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9166 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9167 match(Set dst (RShiftVI src shift)); 9168 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9169 ins_encode %{ 9170 int vector_len = 0; 9171 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9172 %} 9173 ins_pipe( pipe_slow ); 9174 %} 9175 9176 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9177 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9178 match(Set dst (RShiftVI src shift)); 9179 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9180 ins_encode %{ 9181 int vector_len = 0; 9182 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9183 %} 9184 ins_pipe( pipe_slow ); 9185 %} 9186 9187 instruct vsra4I(vecX dst, vecS shift) %{ 9188 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9189 match(Set dst (RShiftVI dst shift)); 9190 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9191 ins_encode %{ 9192 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9193 %} 9194 ins_pipe( pipe_slow ); 9195 %} 9196 9197 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9198 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9199 match(Set dst (RShiftVI dst shift)); 9200 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9201 ins_encode %{ 9202 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9203 %} 9204 ins_pipe( pipe_slow ); 9205 %} 9206 9207 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9208 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9209 match(Set dst (RShiftVI src shift)); 9210 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9211 ins_encode %{ 9212 int vector_len = 0; 9213 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9214 %} 9215 ins_pipe( pipe_slow ); 9216 %} 9217 9218 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9219 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9220 match(Set dst (RShiftVI src shift)); 9221 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9222 ins_encode %{ 9223 int vector_len = 0; 9224 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9225 %} 9226 ins_pipe( pipe_slow ); 9227 %} 9228 9229 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9230 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9231 match(Set dst (RShiftVI src shift)); 9232 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9233 ins_encode %{ 9234 int vector_len = 1; 9235 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9236 %} 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9241 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9242 match(Set dst (RShiftVI src shift)); 9243 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9244 ins_encode %{ 9245 int vector_len = 1; 9246 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9247 %} 9248 ins_pipe( pipe_slow ); 9249 %} 9250 9251 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9252 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9253 match(Set dst (RShiftVI src shift)); 9254 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9255 ins_encode %{ 9256 int vector_len = 2; 9257 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9258 %} 9259 ins_pipe( pipe_slow ); 9260 %} 9261 9262 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9263 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9264 match(Set dst (RShiftVI src shift)); 9265 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9266 ins_encode %{ 9267 int vector_len = 2; 9268 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9269 %} 9270 ins_pipe( pipe_slow ); 9271 %} 9272 9273 // There are no longs vector arithmetic right shift instructions. 9274 9275 9276 // --------------------------------- AND -------------------------------------- 9277 9278 instruct vand4B(vecS dst, vecS src) %{ 9279 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9280 match(Set dst (AndV dst src)); 9281 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9282 ins_encode %{ 9283 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9284 %} 9285 ins_pipe( pipe_slow ); 9286 %} 9287 9288 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9289 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9290 match(Set dst (AndV src1 src2)); 9291 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9292 ins_encode %{ 9293 int vector_len = 0; 9294 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9295 %} 9296 ins_pipe( pipe_slow ); 9297 %} 9298 9299 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9300 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9301 match(Set dst (AndV src (LoadVector mem))); 9302 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9303 ins_encode %{ 9304 int vector_len = 0; 9305 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9306 %} 9307 ins_pipe( pipe_slow ); 9308 %} 9309 9310 instruct vand8B(vecD dst, vecD src) %{ 9311 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9312 match(Set dst (AndV dst src)); 9313 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9314 ins_encode %{ 9315 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9316 %} 9317 ins_pipe( pipe_slow ); 9318 %} 9319 9320 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 9321 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9322 match(Set dst (AndV src1 src2)); 9323 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 9324 ins_encode %{ 9325 int vector_len = 0; 9326 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9327 %} 9328 ins_pipe( pipe_slow ); 9329 %} 9330 9331 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 9332 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9333 match(Set dst (AndV src (LoadVector mem))); 9334 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 9335 ins_encode %{ 9336 int vector_len = 0; 9337 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9338 %} 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 instruct vand16B(vecX dst, vecX src) %{ 9343 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9344 match(Set dst (AndV dst src)); 9345 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 9346 ins_encode %{ 9347 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9348 %} 9349 ins_pipe( pipe_slow ); 9350 %} 9351 9352 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 9353 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9354 match(Set dst (AndV src1 src2)); 9355 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 9356 ins_encode %{ 9357 int vector_len = 0; 9358 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9359 %} 9360 ins_pipe( pipe_slow ); 9361 %} 9362 9363 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 9364 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9365 match(Set dst (AndV src (LoadVector mem))); 9366 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 9367 ins_encode %{ 9368 int vector_len = 0; 9369 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 9375 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9376 match(Set dst (AndV src1 src2)); 9377 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 9378 ins_encode %{ 9379 int vector_len = 1; 9380 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9381 %} 9382 ins_pipe( pipe_slow ); 9383 %} 9384 9385 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 9386 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9387 match(Set dst (AndV src (LoadVector mem))); 9388 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 9389 ins_encode %{ 9390 int vector_len = 1; 9391 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9392 %} 9393 ins_pipe( pipe_slow ); 9394 %} 9395 9396 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9397 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9398 match(Set dst (AndV src1 src2)); 9399 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 9400 ins_encode %{ 9401 int vector_len = 2; 9402 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9403 %} 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9408 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9409 match(Set dst (AndV src (LoadVector mem))); 9410 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9411 ins_encode %{ 9412 int vector_len = 2; 9413 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9414 %} 9415 ins_pipe( pipe_slow ); 9416 %} 9417 9418 // --------------------------------- OR --------------------------------------- 9419 9420 instruct vor4B(vecS dst, vecS src) %{ 9421 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9422 match(Set dst (OrV dst src)); 9423 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9424 ins_encode %{ 9425 __ por($dst$$XMMRegister, $src$$XMMRegister); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9431 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9432 match(Set dst (OrV src1 src2)); 9433 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9434 ins_encode %{ 9435 int vector_len = 0; 9436 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9437 %} 9438 ins_pipe( pipe_slow ); 9439 %} 9440 9441 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9442 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9443 match(Set dst (OrV src (LoadVector mem))); 9444 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9445 ins_encode %{ 9446 int vector_len = 0; 9447 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9448 %} 9449 ins_pipe( pipe_slow ); 9450 %} 9451 9452 instruct vor8B(vecD dst, vecD src) %{ 9453 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9454 match(Set dst (OrV dst src)); 9455 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9456 ins_encode %{ 9457 __ por($dst$$XMMRegister, $src$$XMMRegister); 9458 %} 9459 ins_pipe( pipe_slow ); 9460 %} 9461 9462 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9463 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9464 match(Set dst (OrV src1 src2)); 9465 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9466 ins_encode %{ 9467 int vector_len = 0; 9468 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9474 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9475 match(Set dst (OrV src (LoadVector mem))); 9476 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9477 ins_encode %{ 9478 int vector_len = 0; 9479 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9480 %} 9481 ins_pipe( pipe_slow ); 9482 %} 9483 9484 instruct vor16B(vecX dst, vecX src) %{ 9485 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9486 match(Set dst (OrV dst src)); 9487 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9488 ins_encode %{ 9489 __ por($dst$$XMMRegister, $src$$XMMRegister); 9490 %} 9491 ins_pipe( pipe_slow ); 9492 %} 9493 9494 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9495 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9496 match(Set dst (OrV src1 src2)); 9497 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9498 ins_encode %{ 9499 int vector_len = 0; 9500 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9501 %} 9502 ins_pipe( pipe_slow ); 9503 %} 9504 9505 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9506 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9507 match(Set dst (OrV src (LoadVector mem))); 9508 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9509 ins_encode %{ 9510 int vector_len = 0; 9511 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9512 %} 9513 ins_pipe( pipe_slow ); 9514 %} 9515 9516 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9517 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9518 match(Set dst (OrV src1 src2)); 9519 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9520 ins_encode %{ 9521 int vector_len = 1; 9522 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9523 %} 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9528 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9529 match(Set dst (OrV src (LoadVector mem))); 9530 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9531 ins_encode %{ 9532 int vector_len = 1; 9533 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9534 %} 9535 ins_pipe( pipe_slow ); 9536 %} 9537 9538 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9539 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9540 match(Set dst (OrV src1 src2)); 9541 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9542 ins_encode %{ 9543 int vector_len = 2; 9544 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9550 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9551 match(Set dst (OrV src (LoadVector mem))); 9552 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9553 ins_encode %{ 9554 int vector_len = 2; 9555 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9556 %} 9557 ins_pipe( pipe_slow ); 9558 %} 9559 9560 // --------------------------------- XOR -------------------------------------- 9561 9562 instruct vxor4B(vecS dst, vecS src) %{ 9563 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9564 match(Set dst (XorV dst src)); 9565 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9566 ins_encode %{ 9567 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9568 %} 9569 ins_pipe( pipe_slow ); 9570 %} 9571 9572 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9573 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9574 match(Set dst (XorV src1 src2)); 9575 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9576 ins_encode %{ 9577 int vector_len = 0; 9578 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9579 %} 9580 ins_pipe( pipe_slow ); 9581 %} 9582 9583 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9584 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9585 match(Set dst (XorV src (LoadVector mem))); 9586 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9587 ins_encode %{ 9588 int vector_len = 0; 9589 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9590 %} 9591 ins_pipe( pipe_slow ); 9592 %} 9593 9594 instruct vxor8B(vecD dst, vecD src) %{ 9595 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9596 match(Set dst (XorV dst src)); 9597 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9598 ins_encode %{ 9599 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9605 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9606 match(Set dst (XorV src1 src2)); 9607 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9608 ins_encode %{ 9609 int vector_len = 0; 9610 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9611 %} 9612 ins_pipe( pipe_slow ); 9613 %} 9614 9615 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9616 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9617 match(Set dst (XorV src (LoadVector mem))); 9618 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9619 ins_encode %{ 9620 int vector_len = 0; 9621 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9622 %} 9623 ins_pipe( pipe_slow ); 9624 %} 9625 9626 instruct vxor16B(vecX dst, vecX src) %{ 9627 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9628 match(Set dst (XorV dst src)); 9629 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9630 ins_encode %{ 9631 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9632 %} 9633 ins_pipe( pipe_slow ); 9634 %} 9635 9636 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9638 match(Set dst (XorV src1 src2)); 9639 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9640 ins_encode %{ 9641 int vector_len = 0; 9642 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9643 %} 9644 ins_pipe( pipe_slow ); 9645 %} 9646 9647 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9648 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9649 match(Set dst (XorV src (LoadVector mem))); 9650 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9651 ins_encode %{ 9652 int vector_len = 0; 9653 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9654 %} 9655 ins_pipe( pipe_slow ); 9656 %} 9657 9658 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9659 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9660 match(Set dst (XorV src1 src2)); 9661 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9662 ins_encode %{ 9663 int vector_len = 1; 9664 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9665 %} 9666 ins_pipe( pipe_slow ); 9667 %} 9668 9669 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9670 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9671 match(Set dst (XorV src (LoadVector mem))); 9672 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9673 ins_encode %{ 9674 int vector_len = 1; 9675 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9676 %} 9677 ins_pipe( pipe_slow ); 9678 %} 9679 9680 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9681 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9682 match(Set dst (XorV src1 src2)); 9683 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9684 ins_encode %{ 9685 int vector_len = 2; 9686 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9687 %} 9688 ins_pipe( pipe_slow ); 9689 %} 9690 9691 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9692 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9693 match(Set dst (XorV src (LoadVector mem))); 9694 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9695 ins_encode %{ 9696 int vector_len = 2; 9697 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9698 %} 9699 ins_pipe( pipe_slow ); 9700 %} 9701 9702 // --------------------------------- FMA -------------------------------------- 9703 9704 // a * b + c 9705 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9706 predicate(UseFMA && n->as_Vector()->length() == 2); 9707 match(Set c (FmaVD c (Binary a b))); 9708 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9709 ins_cost(150); 9710 ins_encode %{ 9711 int vector_len = 0; 9712 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9713 %} 9714 ins_pipe( pipe_slow ); 9715 %} 9716 9717 // a * b + c 9718 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9719 predicate(UseFMA && n->as_Vector()->length() == 2); 9720 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9721 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9722 ins_cost(150); 9723 ins_encode %{ 9724 int vector_len = 0; 9725 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9726 %} 9727 ins_pipe( pipe_slow ); 9728 %} 9729 9730 9731 // a * b + c 9732 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9733 predicate(UseFMA && n->as_Vector()->length() == 4); 9734 match(Set c (FmaVD c (Binary a b))); 9735 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9736 ins_cost(150); 9737 ins_encode %{ 9738 int vector_len = 1; 9739 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9740 %} 9741 ins_pipe( pipe_slow ); 9742 %} 9743 9744 // a * b + c 9745 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9746 predicate(UseFMA && n->as_Vector()->length() == 4); 9747 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9748 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9749 ins_cost(150); 9750 ins_encode %{ 9751 int vector_len = 1; 9752 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9753 %} 9754 ins_pipe( pipe_slow ); 9755 %} 9756 9757 // a * b + c 9758 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9759 predicate(UseFMA && n->as_Vector()->length() == 8); 9760 match(Set c (FmaVD c (Binary a b))); 9761 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9762 ins_cost(150); 9763 ins_encode %{ 9764 int vector_len = 2; 9765 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9766 %} 9767 ins_pipe( pipe_slow ); 9768 %} 9769 9770 // a * b + c 9771 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9772 predicate(UseFMA && n->as_Vector()->length() == 8); 9773 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9774 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9775 ins_cost(150); 9776 ins_encode %{ 9777 int vector_len = 2; 9778 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 // a * b + c 9784 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9785 predicate(UseFMA && n->as_Vector()->length() == 4); 9786 match(Set c (FmaVF c (Binary a b))); 9787 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9788 ins_cost(150); 9789 ins_encode %{ 9790 int vector_len = 0; 9791 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9792 %} 9793 ins_pipe( pipe_slow ); 9794 %} 9795 9796 // a * b + c 9797 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9798 predicate(UseFMA && n->as_Vector()->length() == 4); 9799 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9800 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9801 ins_cost(150); 9802 ins_encode %{ 9803 int vector_len = 0; 9804 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9805 %} 9806 ins_pipe( pipe_slow ); 9807 %} 9808 9809 // a * b + c 9810 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9811 predicate(UseFMA && n->as_Vector()->length() == 8); 9812 match(Set c (FmaVF c (Binary a b))); 9813 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9814 ins_cost(150); 9815 ins_encode %{ 9816 int vector_len = 1; 9817 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9818 %} 9819 ins_pipe( pipe_slow ); 9820 %} 9821 9822 // a * b + c 9823 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9824 predicate(UseFMA && n->as_Vector()->length() == 8); 9825 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9826 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9827 ins_cost(150); 9828 ins_encode %{ 9829 int vector_len = 1; 9830 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 // a * b + c 9836 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9837 predicate(UseFMA && n->as_Vector()->length() == 16); 9838 match(Set c (FmaVF c (Binary a b))); 9839 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9840 ins_cost(150); 9841 ins_encode %{ 9842 int vector_len = 2; 9843 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9844 %} 9845 ins_pipe( pipe_slow ); 9846 %} 9847 9848 // a * b + c 9849 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9850 predicate(UseFMA && n->as_Vector()->length() == 16); 9851 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9852 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9853 ins_cost(150); 9854 ins_encode %{ 9855 int vector_len = 2; 9856 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9857 %} 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 // --------------------------------- Vector Multiply Add -------------------------------------- 9862 9863 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9864 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9865 match(Set dst (MulAddVS2VI dst src1)); 9866 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9867 ins_encode %{ 9868 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9869 %} 9870 ins_pipe( pipe_slow ); 9871 %} 9872 9873 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9874 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9875 match(Set dst (MulAddVS2VI src1 src2)); 9876 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9877 ins_encode %{ 9878 int vector_len = 0; 9879 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9880 %} 9881 ins_pipe( pipe_slow ); 9882 %} 9883 9884 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9885 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9886 match(Set dst (MulAddVS2VI dst src1)); 9887 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9888 ins_encode %{ 9889 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9895 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9896 match(Set dst (MulAddVS2VI src1 src2)); 9897 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9898 ins_encode %{ 9899 int vector_len = 0; 9900 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9901 %} 9902 ins_pipe( pipe_slow ); 9903 %} 9904 9905 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9906 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9907 match(Set dst (MulAddVS2VI src1 src2)); 9908 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9909 ins_encode %{ 9910 int vector_len = 1; 9911 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9912 %} 9913 ins_pipe( pipe_slow ); 9914 %} 9915 9916 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9917 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9918 match(Set dst (MulAddVS2VI src1 src2)); 9919 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9920 ins_encode %{ 9921 int vector_len = 2; 9922 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9923 %} 9924 ins_pipe( pipe_slow ); 9925 %} 9926 9927 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9928 9929 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9930 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9931 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9932 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9933 ins_encode %{ 9934 int vector_len = 0; 9935 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9936 %} 9937 ins_pipe( pipe_slow ); 9938 ins_cost(10); 9939 %} 9940 9941 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9942 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9943 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9944 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9945 ins_encode %{ 9946 int vector_len = 0; 9947 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9948 %} 9949 ins_pipe( pipe_slow ); 9950 ins_cost(10); 9951 %} 9952 9953 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9954 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9955 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9956 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9957 ins_encode %{ 9958 int vector_len = 1; 9959 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9960 %} 9961 ins_pipe( pipe_slow ); 9962 ins_cost(10); 9963 %} 9964 9965 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9966 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9967 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9968 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9969 ins_encode %{ 9970 int vector_len = 2; 9971 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 ins_cost(10); 9975 %} 9976 9977 // --------------------------------- PopCount -------------------------------------- 9978 9979 instruct vpopcount2I(vecD dst, vecD src) %{ 9980 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9981 match(Set dst (PopCountVI src)); 9982 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9983 ins_encode %{ 9984 int vector_len = 0; 9985 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9986 %} 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 instruct vpopcount4I(vecX dst, vecX src) %{ 9991 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9992 match(Set dst (PopCountVI src)); 9993 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9994 ins_encode %{ 9995 int vector_len = 0; 9996 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9997 %} 9998 ins_pipe( pipe_slow ); 9999 %} 10000 10001 instruct vpopcount8I(vecY dst, vecY src) %{ 10002 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10003 match(Set dst (PopCountVI src)); 10004 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10005 ins_encode %{ 10006 int vector_len = 1; 10007 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10008 %} 10009 ins_pipe( pipe_slow ); 10010 %} 10011 10012 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10013 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10014 match(Set dst (PopCountVI src)); 10015 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10016 ins_encode %{ 10017 int vector_len = 2; 10018 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %}