1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions plus one move for unreachable address. 1286 return 15+3; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 1376 1377 const bool Matcher::match_rule_supported(int opcode) { 1378 if (!has_match_rule(opcode)) 1379 return false; 1380 1381 bool ret_value = true; 1382 switch (opcode) { 1383 case Op_PopCountI: 1384 case Op_PopCountL: 1385 if (!UsePopCountInstruction) 1386 ret_value = false; 1387 break; 1388 case Op_PopCountVI: 1389 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1390 ret_value = false; 1391 break; 1392 case Op_MulVI: 1393 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1394 ret_value = false; 1395 break; 1396 case Op_MulVL: 1397 case Op_MulReductionVL: 1398 if (VM_Version::supports_avx512dq() == false) 1399 ret_value = false; 1400 break; 1401 case Op_AddReductionVL: 1402 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1403 ret_value = false; 1404 break; 1405 case Op_AddReductionVI: 1406 if (UseSSE < 3) // requires at least SSE3 1407 ret_value = false; 1408 break; 1409 case Op_MulReductionVI: 1410 if (UseSSE < 4) // requires at least SSE4 1411 ret_value = false; 1412 break; 1413 case Op_AddReductionVF: 1414 case Op_AddReductionVD: 1415 case Op_MulReductionVF: 1416 case Op_MulReductionVD: 1417 if (UseSSE < 1) // requires at least SSE 1418 ret_value = false; 1419 break; 1420 case Op_SqrtVD: 1421 case Op_SqrtVF: 1422 if (UseAVX < 1) // enabled for AVX only 1423 ret_value = false; 1424 break; 1425 case Op_CompareAndSwapL: 1426 #ifdef _LP64 1427 case Op_CompareAndSwapP: 1428 #endif 1429 if (!VM_Version::supports_cx8()) 1430 ret_value = false; 1431 break; 1432 case Op_CMoveVF: 1433 case Op_CMoveVD: 1434 if (UseAVX < 1 || UseAVX > 2) 1435 ret_value = false; 1436 break; 1437 case Op_StrIndexOf: 1438 if (!UseSSE42Intrinsics) 1439 ret_value = false; 1440 break; 1441 case Op_StrIndexOfChar: 1442 if (!UseSSE42Intrinsics) 1443 ret_value = false; 1444 break; 1445 case Op_OnSpinWait: 1446 if (VM_Version::supports_on_spin_wait() == false) 1447 ret_value = false; 1448 break; 1449 case Op_MulAddVS2VI: 1450 if (UseSSE < 2) 1451 ret_value = false; 1452 break; 1453 } 1454 1455 return ret_value; // Per default match rules are supported. 1456 } 1457 1458 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1459 // identify extra cases that we might want to provide match rules for 1460 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1461 bool ret_value = match_rule_supported(opcode); 1462 if (ret_value) { 1463 switch (opcode) { 1464 case Op_AddVB: 1465 case Op_SubVB: 1466 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1467 ret_value = false; 1468 break; 1469 case Op_URShiftVS: 1470 case Op_RShiftVS: 1471 case Op_LShiftVS: 1472 case Op_MulVS: 1473 case Op_AddVS: 1474 case Op_SubVS: 1475 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1476 ret_value = false; 1477 break; 1478 case Op_CMoveVF: 1479 if (vlen != 8) 1480 ret_value = false; 1481 break; 1482 case Op_CMoveVD: 1483 if (vlen != 4) 1484 ret_value = false; 1485 break; 1486 } 1487 } 1488 1489 return ret_value; // Per default match rules are supported. 1490 } 1491 1492 const bool Matcher::has_predicated_vectors(void) { 1493 bool ret_value = false; 1494 if (UseAVX > 2) { 1495 ret_value = VM_Version::supports_avx512vl(); 1496 } 1497 1498 return ret_value; 1499 } 1500 1501 const int Matcher::float_pressure(int default_pressure_threshold) { 1502 int float_pressure_threshold = default_pressure_threshold; 1503 #ifdef _LP64 1504 if (UseAVX > 2) { 1505 // Increase pressure threshold on machines with AVX3 which have 1506 // 2x more XMM registers. 1507 float_pressure_threshold = default_pressure_threshold * 2; 1508 } 1509 #endif 1510 return float_pressure_threshold; 1511 } 1512 1513 // Max vector size in bytes. 0 if not supported. 1514 const int Matcher::vector_width_in_bytes(BasicType bt) { 1515 assert(is_java_primitive(bt), "only primitive type vectors"); 1516 if (UseSSE < 2) return 0; 1517 // SSE2 supports 128bit vectors for all types. 1518 // AVX2 supports 256bit vectors for all types. 1519 // AVX2/EVEX supports 512bit vectors for all types. 1520 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1521 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1522 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1523 size = (UseAVX > 2) ? 64 : 32; 1524 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1525 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1526 // Use flag to limit vector size. 1527 size = MIN2(size,(int)MaxVectorSize); 1528 // Minimum 2 values in vector (or 4 for bytes). 1529 switch (bt) { 1530 case T_DOUBLE: 1531 case T_LONG: 1532 if (size < 16) return 0; 1533 break; 1534 case T_FLOAT: 1535 case T_INT: 1536 if (size < 8) return 0; 1537 break; 1538 case T_BOOLEAN: 1539 if (size < 4) return 0; 1540 break; 1541 case T_CHAR: 1542 if (size < 4) return 0; 1543 break; 1544 case T_BYTE: 1545 if (size < 4) return 0; 1546 break; 1547 case T_SHORT: 1548 if (size < 4) return 0; 1549 break; 1550 default: 1551 ShouldNotReachHere(); 1552 } 1553 return size; 1554 } 1555 1556 // Limits on vector size (number of elements) loaded into vector. 1557 const int Matcher::max_vector_size(const BasicType bt) { 1558 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1559 } 1560 const int Matcher::min_vector_size(const BasicType bt) { 1561 int max_size = max_vector_size(bt); 1562 // Min size which can be loaded into vector is 4 bytes. 1563 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1564 return MIN2(size,max_size); 1565 } 1566 1567 // Vector ideal reg corresponding to specified size in bytes 1568 const uint Matcher::vector_ideal_reg(int size) { 1569 assert(MaxVectorSize >= size, ""); 1570 switch(size) { 1571 case 4: return Op_VecS; 1572 case 8: return Op_VecD; 1573 case 16: return Op_VecX; 1574 case 32: return Op_VecY; 1575 case 64: return Op_VecZ; 1576 } 1577 ShouldNotReachHere(); 1578 return 0; 1579 } 1580 1581 // Only lowest bits of xmm reg are used for vector shift count. 1582 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1583 return Op_VecS; 1584 } 1585 1586 // x86 supports misaligned vectors store/load. 1587 const bool Matcher::misaligned_vectors_ok() { 1588 return !AlignVector; // can be changed by flag 1589 } 1590 1591 // x86 AES instructions are compatible with SunJCE expanded 1592 // keys, hence we do not need to pass the original key to stubs 1593 const bool Matcher::pass_original_key_for_aes() { 1594 return false; 1595 } 1596 1597 1598 const bool Matcher::convi2l_type_required = true; 1599 1600 // Check for shift by small constant as well 1601 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1602 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1603 shift->in(2)->get_int() <= 3 && 1604 // Are there other uses besides address expressions? 1605 !matcher->is_visited(shift)) { 1606 address_visited.set(shift->_idx); // Flag as address_visited 1607 mstack.push(shift->in(2), Matcher::Visit); 1608 Node *conv = shift->in(1); 1609 #ifdef _LP64 1610 // Allow Matcher to match the rule which bypass 1611 // ConvI2L operation for an array index on LP64 1612 // if the index value is positive. 1613 if (conv->Opcode() == Op_ConvI2L && 1614 conv->as_Type()->type()->is_long()->_lo >= 0 && 1615 // Are there other uses besides address expressions? 1616 !matcher->is_visited(conv)) { 1617 address_visited.set(conv->_idx); // Flag as address_visited 1618 mstack.push(conv->in(1), Matcher::Pre_Visit); 1619 } else 1620 #endif 1621 mstack.push(conv, Matcher::Pre_Visit); 1622 return true; 1623 } 1624 return false; 1625 } 1626 1627 // Should the Matcher clone shifts on addressing modes, expecting them 1628 // to be subsumed into complex addressing expressions or compute them 1629 // into registers? 1630 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1631 Node *off = m->in(AddPNode::Offset); 1632 if (off->is_Con()) { 1633 address_visited.test_set(m->_idx); // Flag as address_visited 1634 Node *adr = m->in(AddPNode::Address); 1635 1636 // Intel can handle 2 adds in addressing mode 1637 // AtomicAdd is not an addressing expression. 1638 // Cheap to find it by looking for screwy base. 1639 if (adr->is_AddP() && 1640 !adr->in(AddPNode::Base)->is_top() && 1641 // Are there other uses besides address expressions? 1642 !is_visited(adr)) { 1643 address_visited.set(adr->_idx); // Flag as address_visited 1644 Node *shift = adr->in(AddPNode::Offset); 1645 if (!clone_shift(shift, this, mstack, address_visited)) { 1646 mstack.push(shift, Pre_Visit); 1647 } 1648 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1649 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1650 } else { 1651 mstack.push(adr, Pre_Visit); 1652 } 1653 1654 // Clone X+offset as it also folds into most addressing expressions 1655 mstack.push(off, Visit); 1656 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1657 return true; 1658 } else if (clone_shift(off, this, mstack, address_visited)) { 1659 address_visited.test_set(m->_idx); // Flag as address_visited 1660 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1661 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1662 return true; 1663 } 1664 return false; 1665 } 1666 1667 void Compile::reshape_address(AddPNode* addp) { 1668 } 1669 1670 // Helper methods for MachSpillCopyNode::implementation(). 1671 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1672 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1673 // In 64-bit VM size calculation is very complex. Emitting instructions 1674 // into scratch buffer is used to get size in 64-bit VM. 1675 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1676 assert(ireg == Op_VecS || // 32bit vector 1677 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1678 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1679 "no non-adjacent vector moves" ); 1680 if (cbuf) { 1681 MacroAssembler _masm(cbuf); 1682 int offset = __ offset(); 1683 switch (ireg) { 1684 case Op_VecS: // copy whole register 1685 case Op_VecD: 1686 case Op_VecX: 1687 #ifndef LP64 1688 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1689 #else 1690 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1691 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1692 } else { 1693 __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2); 1694 __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1695 } 1696 #endif 1697 break; 1698 case Op_VecY: 1699 #ifndef LP64 1700 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1701 #else 1702 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1703 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1704 } else { 1705 __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2); 1706 __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1707 } 1708 #endif 1709 break; 1710 case Op_VecZ: 1711 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1712 break; 1713 default: 1714 ShouldNotReachHere(); 1715 } 1716 int size = __ offset() - offset; 1717 #ifdef ASSERT 1718 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1719 assert(!do_size || size == 4, "incorrect size calculattion"); 1720 #endif 1721 return size; 1722 #ifndef PRODUCT 1723 } else if (!do_size) { 1724 switch (ireg) { 1725 case Op_VecS: 1726 case Op_VecD: 1727 case Op_VecX: 1728 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1729 break; 1730 case Op_VecY: 1731 case Op_VecZ: 1732 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1733 break; 1734 default: 1735 ShouldNotReachHere(); 1736 } 1737 #endif 1738 } 1739 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1740 return (UseAVX > 2) ? 6 : 4; 1741 } 1742 1743 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1744 int stack_offset, int reg, uint ireg, outputStream* st) { 1745 // In 64-bit VM size calculation is very complex. Emitting instructions 1746 // into scratch buffer is used to get size in 64-bit VM. 1747 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1748 if (cbuf) { 1749 MacroAssembler _masm(cbuf); 1750 int offset = __ offset(); 1751 if (is_load) { 1752 switch (ireg) { 1753 case Op_VecS: 1754 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1755 break; 1756 case Op_VecD: 1757 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1758 break; 1759 case Op_VecX: 1760 #ifndef LP64 1761 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1762 #else 1763 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1764 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1765 } else { 1766 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1767 __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1768 } 1769 #endif 1770 break; 1771 case Op_VecY: 1772 #ifndef LP64 1773 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1774 #else 1775 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1776 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1777 } else { 1778 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1779 __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1780 } 1781 #endif 1782 break; 1783 case Op_VecZ: 1784 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1785 break; 1786 default: 1787 ShouldNotReachHere(); 1788 } 1789 } else { // store 1790 switch (ireg) { 1791 case Op_VecS: 1792 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1793 break; 1794 case Op_VecD: 1795 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1796 break; 1797 case Op_VecX: 1798 #ifndef LP64 1799 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1800 #else 1801 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1802 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1803 } 1804 else { 1805 __ vextracti32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1806 } 1807 #endif 1808 break; 1809 case Op_VecY: 1810 #ifndef LP64 1811 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1812 #else 1813 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1814 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1815 } 1816 else { 1817 __ vextracti64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1818 } 1819 #endif 1820 break; 1821 case Op_VecZ: 1822 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1823 break; 1824 default: 1825 ShouldNotReachHere(); 1826 } 1827 } 1828 int size = __ offset() - offset; 1829 #ifdef ASSERT 1830 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1831 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1832 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1833 #endif 1834 return size; 1835 #ifndef PRODUCT 1836 } else if (!do_size) { 1837 if (is_load) { 1838 switch (ireg) { 1839 case Op_VecS: 1840 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1841 break; 1842 case Op_VecD: 1843 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1844 break; 1845 case Op_VecX: 1846 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1847 break; 1848 case Op_VecY: 1849 case Op_VecZ: 1850 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1851 break; 1852 default: 1853 ShouldNotReachHere(); 1854 } 1855 } else { // store 1856 switch (ireg) { 1857 case Op_VecS: 1858 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1859 break; 1860 case Op_VecD: 1861 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1862 break; 1863 case Op_VecX: 1864 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1865 break; 1866 case Op_VecY: 1867 case Op_VecZ: 1868 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1869 break; 1870 default: 1871 ShouldNotReachHere(); 1872 } 1873 } 1874 #endif 1875 } 1876 bool is_single_byte = false; 1877 int vec_len = 0; 1878 if ((UseAVX > 2) && (stack_offset != 0)) { 1879 int tuple_type = Assembler::EVEX_FVM; 1880 int input_size = Assembler::EVEX_32bit; 1881 switch (ireg) { 1882 case Op_VecS: 1883 tuple_type = Assembler::EVEX_T1S; 1884 break; 1885 case Op_VecD: 1886 tuple_type = Assembler::EVEX_T1S; 1887 input_size = Assembler::EVEX_64bit; 1888 break; 1889 case Op_VecX: 1890 break; 1891 case Op_VecY: 1892 vec_len = 1; 1893 break; 1894 case Op_VecZ: 1895 vec_len = 2; 1896 break; 1897 } 1898 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1899 } 1900 int offset_size = 0; 1901 int size = 5; 1902 if (UseAVX > 2 ) { 1903 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1904 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1905 size += 2; // Need an additional two bytes for EVEX encoding 1906 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1907 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1908 } else { 1909 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1910 size += 2; // Need an additional two bytes for EVEX encodding 1911 } 1912 } else { 1913 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1914 } 1915 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1916 return size+offset_size; 1917 } 1918 1919 static inline jint replicate4_imm(int con, int width) { 1920 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1921 assert(width == 1 || width == 2, "only byte or short types here"); 1922 int bit_width = width * 8; 1923 jint val = con; 1924 val &= (1 << bit_width) - 1; // mask off sign bits 1925 while(bit_width < 32) { 1926 val |= (val << bit_width); 1927 bit_width <<= 1; 1928 } 1929 return val; 1930 } 1931 1932 static inline jlong replicate8_imm(int con, int width) { 1933 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1934 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1935 int bit_width = width * 8; 1936 jlong val = con; 1937 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1938 while(bit_width < 64) { 1939 val |= (val << bit_width); 1940 bit_width <<= 1; 1941 } 1942 return val; 1943 } 1944 1945 #ifndef PRODUCT 1946 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1947 st->print("nop \t# %d bytes pad for loops and calls", _count); 1948 } 1949 #endif 1950 1951 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1952 MacroAssembler _masm(&cbuf); 1953 __ nop(_count); 1954 } 1955 1956 uint MachNopNode::size(PhaseRegAlloc*) const { 1957 return _count; 1958 } 1959 1960 #ifndef PRODUCT 1961 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1962 st->print("# breakpoint"); 1963 } 1964 #endif 1965 1966 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1967 MacroAssembler _masm(&cbuf); 1968 __ int3(); 1969 } 1970 1971 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1972 return MachNode::size(ra_); 1973 } 1974 1975 %} 1976 1977 encode %{ 1978 1979 enc_class call_epilog %{ 1980 if (VerifyStackAtCalls) { 1981 // Check that stack depth is unchanged: find majik cookie on stack 1982 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1983 MacroAssembler _masm(&cbuf); 1984 Label L; 1985 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1986 __ jccb(Assembler::equal, L); 1987 // Die if stack mismatch 1988 __ int3(); 1989 __ bind(L); 1990 } 1991 %} 1992 1993 %} 1994 1995 1996 //----------OPERANDS----------------------------------------------------------- 1997 // Operand definitions must precede instruction definitions for correct parsing 1998 // in the ADLC because operands constitute user defined types which are used in 1999 // instruction definitions. 2000 2001 operand vecZ() %{ 2002 constraint(ALLOC_IN_RC(vectorz_reg)); 2003 match(VecZ); 2004 2005 format %{ %} 2006 interface(REG_INTER); 2007 %} 2008 2009 operand legVecZ() %{ 2010 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2011 match(VecZ); 2012 2013 format %{ %} 2014 interface(REG_INTER); 2015 %} 2016 2017 // Comparison Code for FP conditional move 2018 operand cmpOp_vcmppd() %{ 2019 match(Bool); 2020 2021 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2022 n->as_Bool()->_test._test != BoolTest::no_overflow); 2023 format %{ "" %} 2024 interface(COND_INTER) %{ 2025 equal (0x0, "eq"); 2026 less (0x1, "lt"); 2027 less_equal (0x2, "le"); 2028 not_equal (0xC, "ne"); 2029 greater_equal(0xD, "ge"); 2030 greater (0xE, "gt"); 2031 //TODO cannot compile (adlc breaks) without two next lines with error: 2032 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2033 // equal' for overflow. 2034 overflow (0x20, "o"); // not really supported by the instruction 2035 no_overflow (0x21, "no"); // not really supported by the instruction 2036 %} 2037 %} 2038 2039 2040 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2041 2042 // ============================================================================ 2043 2044 instruct ShouldNotReachHere() %{ 2045 match(Halt); 2046 format %{ "ud2\t# ShouldNotReachHere" %} 2047 ins_encode %{ 2048 __ ud2(); 2049 %} 2050 ins_pipe(pipe_slow); 2051 %} 2052 2053 // =================================EVEX special=============================== 2054 2055 instruct setMask(rRegI dst, rRegI src) %{ 2056 predicate(Matcher::has_predicated_vectors()); 2057 match(Set dst (SetVectMaskI src)); 2058 effect(TEMP dst); 2059 format %{ "setvectmask $dst, $src" %} 2060 ins_encode %{ 2061 __ setvectmask($dst$$Register, $src$$Register); 2062 %} 2063 ins_pipe(pipe_slow); 2064 %} 2065 2066 // ============================================================================ 2067 2068 instruct addF_reg(regF dst, regF src) %{ 2069 predicate((UseSSE>=1) && (UseAVX == 0)); 2070 match(Set dst (AddF dst src)); 2071 2072 format %{ "addss $dst, $src" %} 2073 ins_cost(150); 2074 ins_encode %{ 2075 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2076 %} 2077 ins_pipe(pipe_slow); 2078 %} 2079 2080 instruct addF_mem(regF dst, memory src) %{ 2081 predicate((UseSSE>=1) && (UseAVX == 0)); 2082 match(Set dst (AddF dst (LoadF src))); 2083 2084 format %{ "addss $dst, $src" %} 2085 ins_cost(150); 2086 ins_encode %{ 2087 __ addss($dst$$XMMRegister, $src$$Address); 2088 %} 2089 ins_pipe(pipe_slow); 2090 %} 2091 2092 instruct addF_imm(regF dst, immF con) %{ 2093 predicate((UseSSE>=1) && (UseAVX == 0)); 2094 match(Set dst (AddF dst con)); 2095 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2096 ins_cost(150); 2097 ins_encode %{ 2098 __ addss($dst$$XMMRegister, $constantaddress($con)); 2099 %} 2100 ins_pipe(pipe_slow); 2101 %} 2102 2103 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2104 predicate(UseAVX > 0); 2105 match(Set dst (AddF src1 src2)); 2106 2107 format %{ "vaddss $dst, $src1, $src2" %} 2108 ins_cost(150); 2109 ins_encode %{ 2110 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2111 %} 2112 ins_pipe(pipe_slow); 2113 %} 2114 2115 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2116 predicate(UseAVX > 0); 2117 match(Set dst (AddF src1 (LoadF src2))); 2118 2119 format %{ "vaddss $dst, $src1, $src2" %} 2120 ins_cost(150); 2121 ins_encode %{ 2122 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2123 %} 2124 ins_pipe(pipe_slow); 2125 %} 2126 2127 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2128 predicate(UseAVX > 0); 2129 match(Set dst (AddF src con)); 2130 2131 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2132 ins_cost(150); 2133 ins_encode %{ 2134 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2135 %} 2136 ins_pipe(pipe_slow); 2137 %} 2138 2139 instruct addD_reg(regD dst, regD src) %{ 2140 predicate((UseSSE>=2) && (UseAVX == 0)); 2141 match(Set dst (AddD dst src)); 2142 2143 format %{ "addsd $dst, $src" %} 2144 ins_cost(150); 2145 ins_encode %{ 2146 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2147 %} 2148 ins_pipe(pipe_slow); 2149 %} 2150 2151 instruct addD_mem(regD dst, memory src) %{ 2152 predicate((UseSSE>=2) && (UseAVX == 0)); 2153 match(Set dst (AddD dst (LoadD src))); 2154 2155 format %{ "addsd $dst, $src" %} 2156 ins_cost(150); 2157 ins_encode %{ 2158 __ addsd($dst$$XMMRegister, $src$$Address); 2159 %} 2160 ins_pipe(pipe_slow); 2161 %} 2162 2163 instruct addD_imm(regD dst, immD con) %{ 2164 predicate((UseSSE>=2) && (UseAVX == 0)); 2165 match(Set dst (AddD dst con)); 2166 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2167 ins_cost(150); 2168 ins_encode %{ 2169 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2170 %} 2171 ins_pipe(pipe_slow); 2172 %} 2173 2174 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2175 predicate(UseAVX > 0); 2176 match(Set dst (AddD src1 src2)); 2177 2178 format %{ "vaddsd $dst, $src1, $src2" %} 2179 ins_cost(150); 2180 ins_encode %{ 2181 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2182 %} 2183 ins_pipe(pipe_slow); 2184 %} 2185 2186 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2187 predicate(UseAVX > 0); 2188 match(Set dst (AddD src1 (LoadD src2))); 2189 2190 format %{ "vaddsd $dst, $src1, $src2" %} 2191 ins_cost(150); 2192 ins_encode %{ 2193 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2194 %} 2195 ins_pipe(pipe_slow); 2196 %} 2197 2198 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2199 predicate(UseAVX > 0); 2200 match(Set dst (AddD src con)); 2201 2202 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2203 ins_cost(150); 2204 ins_encode %{ 2205 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2206 %} 2207 ins_pipe(pipe_slow); 2208 %} 2209 2210 instruct subF_reg(regF dst, regF src) %{ 2211 predicate((UseSSE>=1) && (UseAVX == 0)); 2212 match(Set dst (SubF dst src)); 2213 2214 format %{ "subss $dst, $src" %} 2215 ins_cost(150); 2216 ins_encode %{ 2217 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2218 %} 2219 ins_pipe(pipe_slow); 2220 %} 2221 2222 instruct subF_mem(regF dst, memory src) %{ 2223 predicate((UseSSE>=1) && (UseAVX == 0)); 2224 match(Set dst (SubF dst (LoadF src))); 2225 2226 format %{ "subss $dst, $src" %} 2227 ins_cost(150); 2228 ins_encode %{ 2229 __ subss($dst$$XMMRegister, $src$$Address); 2230 %} 2231 ins_pipe(pipe_slow); 2232 %} 2233 2234 instruct subF_imm(regF dst, immF con) %{ 2235 predicate((UseSSE>=1) && (UseAVX == 0)); 2236 match(Set dst (SubF dst con)); 2237 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2238 ins_cost(150); 2239 ins_encode %{ 2240 __ subss($dst$$XMMRegister, $constantaddress($con)); 2241 %} 2242 ins_pipe(pipe_slow); 2243 %} 2244 2245 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2246 predicate(UseAVX > 0); 2247 match(Set dst (SubF src1 src2)); 2248 2249 format %{ "vsubss $dst, $src1, $src2" %} 2250 ins_cost(150); 2251 ins_encode %{ 2252 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2253 %} 2254 ins_pipe(pipe_slow); 2255 %} 2256 2257 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2258 predicate(UseAVX > 0); 2259 match(Set dst (SubF src1 (LoadF src2))); 2260 2261 format %{ "vsubss $dst, $src1, $src2" %} 2262 ins_cost(150); 2263 ins_encode %{ 2264 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2265 %} 2266 ins_pipe(pipe_slow); 2267 %} 2268 2269 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2270 predicate(UseAVX > 0); 2271 match(Set dst (SubF src con)); 2272 2273 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2274 ins_cost(150); 2275 ins_encode %{ 2276 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2277 %} 2278 ins_pipe(pipe_slow); 2279 %} 2280 2281 instruct subD_reg(regD dst, regD src) %{ 2282 predicate((UseSSE>=2) && (UseAVX == 0)); 2283 match(Set dst (SubD dst src)); 2284 2285 format %{ "subsd $dst, $src" %} 2286 ins_cost(150); 2287 ins_encode %{ 2288 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2289 %} 2290 ins_pipe(pipe_slow); 2291 %} 2292 2293 instruct subD_mem(regD dst, memory src) %{ 2294 predicate((UseSSE>=2) && (UseAVX == 0)); 2295 match(Set dst (SubD dst (LoadD src))); 2296 2297 format %{ "subsd $dst, $src" %} 2298 ins_cost(150); 2299 ins_encode %{ 2300 __ subsd($dst$$XMMRegister, $src$$Address); 2301 %} 2302 ins_pipe(pipe_slow); 2303 %} 2304 2305 instruct subD_imm(regD dst, immD con) %{ 2306 predicate((UseSSE>=2) && (UseAVX == 0)); 2307 match(Set dst (SubD dst con)); 2308 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2309 ins_cost(150); 2310 ins_encode %{ 2311 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2312 %} 2313 ins_pipe(pipe_slow); 2314 %} 2315 2316 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2317 predicate(UseAVX > 0); 2318 match(Set dst (SubD src1 src2)); 2319 2320 format %{ "vsubsd $dst, $src1, $src2" %} 2321 ins_cost(150); 2322 ins_encode %{ 2323 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2324 %} 2325 ins_pipe(pipe_slow); 2326 %} 2327 2328 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2329 predicate(UseAVX > 0); 2330 match(Set dst (SubD src1 (LoadD src2))); 2331 2332 format %{ "vsubsd $dst, $src1, $src2" %} 2333 ins_cost(150); 2334 ins_encode %{ 2335 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2336 %} 2337 ins_pipe(pipe_slow); 2338 %} 2339 2340 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2341 predicate(UseAVX > 0); 2342 match(Set dst (SubD src con)); 2343 2344 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2345 ins_cost(150); 2346 ins_encode %{ 2347 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2348 %} 2349 ins_pipe(pipe_slow); 2350 %} 2351 2352 instruct mulF_reg(regF dst, regF src) %{ 2353 predicate((UseSSE>=1) && (UseAVX == 0)); 2354 match(Set dst (MulF dst src)); 2355 2356 format %{ "mulss $dst, $src" %} 2357 ins_cost(150); 2358 ins_encode %{ 2359 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2360 %} 2361 ins_pipe(pipe_slow); 2362 %} 2363 2364 instruct mulF_mem(regF dst, memory src) %{ 2365 predicate((UseSSE>=1) && (UseAVX == 0)); 2366 match(Set dst (MulF dst (LoadF src))); 2367 2368 format %{ "mulss $dst, $src" %} 2369 ins_cost(150); 2370 ins_encode %{ 2371 __ mulss($dst$$XMMRegister, $src$$Address); 2372 %} 2373 ins_pipe(pipe_slow); 2374 %} 2375 2376 instruct mulF_imm(regF dst, immF con) %{ 2377 predicate((UseSSE>=1) && (UseAVX == 0)); 2378 match(Set dst (MulF dst con)); 2379 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2380 ins_cost(150); 2381 ins_encode %{ 2382 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2383 %} 2384 ins_pipe(pipe_slow); 2385 %} 2386 2387 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2388 predicate(UseAVX > 0); 2389 match(Set dst (MulF src1 src2)); 2390 2391 format %{ "vmulss $dst, $src1, $src2" %} 2392 ins_cost(150); 2393 ins_encode %{ 2394 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2395 %} 2396 ins_pipe(pipe_slow); 2397 %} 2398 2399 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2400 predicate(UseAVX > 0); 2401 match(Set dst (MulF src1 (LoadF src2))); 2402 2403 format %{ "vmulss $dst, $src1, $src2" %} 2404 ins_cost(150); 2405 ins_encode %{ 2406 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2407 %} 2408 ins_pipe(pipe_slow); 2409 %} 2410 2411 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2412 predicate(UseAVX > 0); 2413 match(Set dst (MulF src con)); 2414 2415 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2416 ins_cost(150); 2417 ins_encode %{ 2418 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2419 %} 2420 ins_pipe(pipe_slow); 2421 %} 2422 2423 instruct mulD_reg(regD dst, regD src) %{ 2424 predicate((UseSSE>=2) && (UseAVX == 0)); 2425 match(Set dst (MulD dst src)); 2426 2427 format %{ "mulsd $dst, $src" %} 2428 ins_cost(150); 2429 ins_encode %{ 2430 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2431 %} 2432 ins_pipe(pipe_slow); 2433 %} 2434 2435 instruct mulD_mem(regD dst, memory src) %{ 2436 predicate((UseSSE>=2) && (UseAVX == 0)); 2437 match(Set dst (MulD dst (LoadD src))); 2438 2439 format %{ "mulsd $dst, $src" %} 2440 ins_cost(150); 2441 ins_encode %{ 2442 __ mulsd($dst$$XMMRegister, $src$$Address); 2443 %} 2444 ins_pipe(pipe_slow); 2445 %} 2446 2447 instruct mulD_imm(regD dst, immD con) %{ 2448 predicate((UseSSE>=2) && (UseAVX == 0)); 2449 match(Set dst (MulD dst con)); 2450 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2451 ins_cost(150); 2452 ins_encode %{ 2453 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2454 %} 2455 ins_pipe(pipe_slow); 2456 %} 2457 2458 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2459 predicate(UseAVX > 0); 2460 match(Set dst (MulD src1 src2)); 2461 2462 format %{ "vmulsd $dst, $src1, $src2" %} 2463 ins_cost(150); 2464 ins_encode %{ 2465 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2466 %} 2467 ins_pipe(pipe_slow); 2468 %} 2469 2470 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2471 predicate(UseAVX > 0); 2472 match(Set dst (MulD src1 (LoadD src2))); 2473 2474 format %{ "vmulsd $dst, $src1, $src2" %} 2475 ins_cost(150); 2476 ins_encode %{ 2477 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2478 %} 2479 ins_pipe(pipe_slow); 2480 %} 2481 2482 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2483 predicate(UseAVX > 0); 2484 match(Set dst (MulD src con)); 2485 2486 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2487 ins_cost(150); 2488 ins_encode %{ 2489 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2490 %} 2491 ins_pipe(pipe_slow); 2492 %} 2493 2494 instruct divF_reg(regF dst, regF src) %{ 2495 predicate((UseSSE>=1) && (UseAVX == 0)); 2496 match(Set dst (DivF dst src)); 2497 2498 format %{ "divss $dst, $src" %} 2499 ins_cost(150); 2500 ins_encode %{ 2501 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2502 %} 2503 ins_pipe(pipe_slow); 2504 %} 2505 2506 instruct divF_mem(regF dst, memory src) %{ 2507 predicate((UseSSE>=1) && (UseAVX == 0)); 2508 match(Set dst (DivF dst (LoadF src))); 2509 2510 format %{ "divss $dst, $src" %} 2511 ins_cost(150); 2512 ins_encode %{ 2513 __ divss($dst$$XMMRegister, $src$$Address); 2514 %} 2515 ins_pipe(pipe_slow); 2516 %} 2517 2518 instruct divF_imm(regF dst, immF con) %{ 2519 predicate((UseSSE>=1) && (UseAVX == 0)); 2520 match(Set dst (DivF dst con)); 2521 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2522 ins_cost(150); 2523 ins_encode %{ 2524 __ divss($dst$$XMMRegister, $constantaddress($con)); 2525 %} 2526 ins_pipe(pipe_slow); 2527 %} 2528 2529 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2530 predicate(UseAVX > 0); 2531 match(Set dst (DivF src1 src2)); 2532 2533 format %{ "vdivss $dst, $src1, $src2" %} 2534 ins_cost(150); 2535 ins_encode %{ 2536 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2537 %} 2538 ins_pipe(pipe_slow); 2539 %} 2540 2541 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2542 predicate(UseAVX > 0); 2543 match(Set dst (DivF src1 (LoadF src2))); 2544 2545 format %{ "vdivss $dst, $src1, $src2" %} 2546 ins_cost(150); 2547 ins_encode %{ 2548 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2549 %} 2550 ins_pipe(pipe_slow); 2551 %} 2552 2553 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2554 predicate(UseAVX > 0); 2555 match(Set dst (DivF src con)); 2556 2557 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2558 ins_cost(150); 2559 ins_encode %{ 2560 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2561 %} 2562 ins_pipe(pipe_slow); 2563 %} 2564 2565 instruct divD_reg(regD dst, regD src) %{ 2566 predicate((UseSSE>=2) && (UseAVX == 0)); 2567 match(Set dst (DivD dst src)); 2568 2569 format %{ "divsd $dst, $src" %} 2570 ins_cost(150); 2571 ins_encode %{ 2572 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2573 %} 2574 ins_pipe(pipe_slow); 2575 %} 2576 2577 instruct divD_mem(regD dst, memory src) %{ 2578 predicate((UseSSE>=2) && (UseAVX == 0)); 2579 match(Set dst (DivD dst (LoadD src))); 2580 2581 format %{ "divsd $dst, $src" %} 2582 ins_cost(150); 2583 ins_encode %{ 2584 __ divsd($dst$$XMMRegister, $src$$Address); 2585 %} 2586 ins_pipe(pipe_slow); 2587 %} 2588 2589 instruct divD_imm(regD dst, immD con) %{ 2590 predicate((UseSSE>=2) && (UseAVX == 0)); 2591 match(Set dst (DivD dst con)); 2592 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2593 ins_cost(150); 2594 ins_encode %{ 2595 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2596 %} 2597 ins_pipe(pipe_slow); 2598 %} 2599 2600 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2601 predicate(UseAVX > 0); 2602 match(Set dst (DivD src1 src2)); 2603 2604 format %{ "vdivsd $dst, $src1, $src2" %} 2605 ins_cost(150); 2606 ins_encode %{ 2607 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2608 %} 2609 ins_pipe(pipe_slow); 2610 %} 2611 2612 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2613 predicate(UseAVX > 0); 2614 match(Set dst (DivD src1 (LoadD src2))); 2615 2616 format %{ "vdivsd $dst, $src1, $src2" %} 2617 ins_cost(150); 2618 ins_encode %{ 2619 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2620 %} 2621 ins_pipe(pipe_slow); 2622 %} 2623 2624 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2625 predicate(UseAVX > 0); 2626 match(Set dst (DivD src con)); 2627 2628 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2629 ins_cost(150); 2630 ins_encode %{ 2631 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2632 %} 2633 ins_pipe(pipe_slow); 2634 %} 2635 2636 instruct absF_reg(regF dst) %{ 2637 predicate((UseSSE>=1) && (UseAVX == 0)); 2638 match(Set dst (AbsF dst)); 2639 ins_cost(150); 2640 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2641 ins_encode %{ 2642 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2643 %} 2644 ins_pipe(pipe_slow); 2645 %} 2646 2647 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2648 predicate(UseAVX > 0); 2649 match(Set dst (AbsF src)); 2650 ins_cost(150); 2651 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2652 ins_encode %{ 2653 int vector_len = 0; 2654 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2655 ExternalAddress(float_signmask()), vector_len); 2656 %} 2657 ins_pipe(pipe_slow); 2658 %} 2659 2660 instruct absD_reg(regD dst) %{ 2661 predicate((UseSSE>=2) && (UseAVX == 0)); 2662 match(Set dst (AbsD dst)); 2663 ins_cost(150); 2664 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2665 "# abs double by sign masking" %} 2666 ins_encode %{ 2667 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2668 %} 2669 ins_pipe(pipe_slow); 2670 %} 2671 2672 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2673 predicate(UseAVX > 0); 2674 match(Set dst (AbsD src)); 2675 ins_cost(150); 2676 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2677 "# abs double by sign masking" %} 2678 ins_encode %{ 2679 int vector_len = 0; 2680 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2681 ExternalAddress(double_signmask()), vector_len); 2682 %} 2683 ins_pipe(pipe_slow); 2684 %} 2685 2686 instruct negF_reg(regF dst) %{ 2687 predicate((UseSSE>=1) && (UseAVX == 0)); 2688 match(Set dst (NegF dst)); 2689 ins_cost(150); 2690 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2691 ins_encode %{ 2692 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2693 %} 2694 ins_pipe(pipe_slow); 2695 %} 2696 2697 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2698 predicate(UseAVX > 0); 2699 match(Set dst (NegF src)); 2700 ins_cost(150); 2701 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2702 ins_encode %{ 2703 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2704 ExternalAddress(float_signflip())); 2705 %} 2706 ins_pipe(pipe_slow); 2707 %} 2708 2709 instruct negD_reg(regD dst) %{ 2710 predicate((UseSSE>=2) && (UseAVX == 0)); 2711 match(Set dst (NegD dst)); 2712 ins_cost(150); 2713 format %{ "xorpd $dst, [0x8000000000000000]\t" 2714 "# neg double by sign flipping" %} 2715 ins_encode %{ 2716 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2717 %} 2718 ins_pipe(pipe_slow); 2719 %} 2720 2721 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2722 predicate(UseAVX > 0); 2723 match(Set dst (NegD src)); 2724 ins_cost(150); 2725 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2726 "# neg double by sign flipping" %} 2727 ins_encode %{ 2728 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2729 ExternalAddress(double_signflip())); 2730 %} 2731 ins_pipe(pipe_slow); 2732 %} 2733 2734 instruct sqrtF_reg(regF dst, regF src) %{ 2735 predicate(UseSSE>=1); 2736 match(Set dst (SqrtF src)); 2737 2738 format %{ "sqrtss $dst, $src" %} 2739 ins_cost(150); 2740 ins_encode %{ 2741 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2742 %} 2743 ins_pipe(pipe_slow); 2744 %} 2745 2746 instruct sqrtF_mem(regF dst, memory src) %{ 2747 predicate(UseSSE>=1); 2748 match(Set dst (SqrtF (LoadF src))); 2749 2750 format %{ "sqrtss $dst, $src" %} 2751 ins_cost(150); 2752 ins_encode %{ 2753 __ sqrtss($dst$$XMMRegister, $src$$Address); 2754 %} 2755 ins_pipe(pipe_slow); 2756 %} 2757 2758 instruct sqrtF_imm(regF dst, immF con) %{ 2759 predicate(UseSSE>=1); 2760 match(Set dst (SqrtF con)); 2761 2762 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2763 ins_cost(150); 2764 ins_encode %{ 2765 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2766 %} 2767 ins_pipe(pipe_slow); 2768 %} 2769 2770 instruct sqrtD_reg(regD dst, regD src) %{ 2771 predicate(UseSSE>=2); 2772 match(Set dst (SqrtD src)); 2773 2774 format %{ "sqrtsd $dst, $src" %} 2775 ins_cost(150); 2776 ins_encode %{ 2777 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2778 %} 2779 ins_pipe(pipe_slow); 2780 %} 2781 2782 instruct sqrtD_mem(regD dst, memory src) %{ 2783 predicate(UseSSE>=2); 2784 match(Set dst (SqrtD (LoadD src))); 2785 2786 format %{ "sqrtsd $dst, $src" %} 2787 ins_cost(150); 2788 ins_encode %{ 2789 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2790 %} 2791 ins_pipe(pipe_slow); 2792 %} 2793 2794 instruct sqrtD_imm(regD dst, immD con) %{ 2795 predicate(UseSSE>=2); 2796 match(Set dst (SqrtD con)); 2797 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2798 ins_cost(150); 2799 ins_encode %{ 2800 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2801 %} 2802 ins_pipe(pipe_slow); 2803 %} 2804 2805 instruct onspinwait() %{ 2806 match(OnSpinWait); 2807 ins_cost(200); 2808 2809 format %{ 2810 $$template 2811 $$emit$$"pause\t! membar_onspinwait" 2812 %} 2813 ins_encode %{ 2814 __ pause(); 2815 %} 2816 ins_pipe(pipe_slow); 2817 %} 2818 2819 // a * b + c 2820 instruct fmaD_reg(regD a, regD b, regD c) %{ 2821 predicate(UseFMA); 2822 match(Set c (FmaD c (Binary a b))); 2823 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2824 ins_cost(150); 2825 ins_encode %{ 2826 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2827 %} 2828 ins_pipe( pipe_slow ); 2829 %} 2830 2831 // a * b + c 2832 instruct fmaF_reg(regF a, regF b, regF c) %{ 2833 predicate(UseFMA); 2834 match(Set c (FmaF c (Binary a b))); 2835 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2836 ins_cost(150); 2837 ins_encode %{ 2838 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2839 %} 2840 ins_pipe( pipe_slow ); 2841 %} 2842 2843 // ====================VECTOR INSTRUCTIONS===================================== 2844 2845 2846 // Load vectors (4 bytes long) 2847 instruct loadV4(vecS dst, memory mem) %{ 2848 predicate(n->as_LoadVector()->memory_size() == 4); 2849 match(Set dst (LoadVector mem)); 2850 ins_cost(125); 2851 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2852 ins_encode %{ 2853 __ movdl($dst$$XMMRegister, $mem$$Address); 2854 %} 2855 ins_pipe( pipe_slow ); 2856 %} 2857 2858 // Load vectors (4 bytes long) 2859 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2860 match(Set dst src); 2861 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2862 ins_encode %{ 2863 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2864 %} 2865 ins_pipe( fpu_reg_reg ); 2866 %} 2867 2868 // Load vectors (4 bytes long) 2869 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2870 match(Set dst src); 2871 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2872 ins_encode %{ 2873 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2874 %} 2875 ins_pipe( fpu_reg_reg ); 2876 %} 2877 2878 // Load vectors (8 bytes long) 2879 instruct loadV8(vecD dst, memory mem) %{ 2880 predicate(n->as_LoadVector()->memory_size() == 8); 2881 match(Set dst (LoadVector mem)); 2882 ins_cost(125); 2883 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2884 ins_encode %{ 2885 __ movq($dst$$XMMRegister, $mem$$Address); 2886 %} 2887 ins_pipe( pipe_slow ); 2888 %} 2889 2890 // Load vectors (8 bytes long) 2891 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2892 match(Set dst src); 2893 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2894 ins_encode %{ 2895 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2896 %} 2897 ins_pipe( fpu_reg_reg ); 2898 %} 2899 2900 // Load vectors (8 bytes long) 2901 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2902 match(Set dst src); 2903 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2904 ins_encode %{ 2905 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2906 %} 2907 ins_pipe( fpu_reg_reg ); 2908 %} 2909 2910 // Load vectors (16 bytes long) 2911 instruct loadV16(vecX dst, memory mem) %{ 2912 predicate(n->as_LoadVector()->memory_size() == 16); 2913 match(Set dst (LoadVector mem)); 2914 ins_cost(125); 2915 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2916 ins_encode %{ 2917 __ movdqu($dst$$XMMRegister, $mem$$Address); 2918 %} 2919 ins_pipe( pipe_slow ); 2920 %} 2921 2922 // Load vectors (16 bytes long) 2923 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2924 match(Set dst src); 2925 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2926 ins_encode %{ 2927 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2928 int vector_len = 2; 2929 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2930 } else { 2931 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2932 } 2933 %} 2934 ins_pipe( fpu_reg_reg ); 2935 %} 2936 2937 // Load vectors (16 bytes long) 2938 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2939 match(Set dst src); 2940 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2941 ins_encode %{ 2942 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2943 int vector_len = 2; 2944 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2945 } else { 2946 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2947 } 2948 %} 2949 ins_pipe( fpu_reg_reg ); 2950 %} 2951 2952 // Load vectors (32 bytes long) 2953 instruct loadV32(vecY dst, memory mem) %{ 2954 predicate(n->as_LoadVector()->memory_size() == 32); 2955 match(Set dst (LoadVector mem)); 2956 ins_cost(125); 2957 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2958 ins_encode %{ 2959 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2960 %} 2961 ins_pipe( pipe_slow ); 2962 %} 2963 2964 // Load vectors (32 bytes long) 2965 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 2966 match(Set dst src); 2967 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2968 ins_encode %{ 2969 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2970 int vector_len = 2; 2971 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2972 } else { 2973 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2974 } 2975 %} 2976 ins_pipe( fpu_reg_reg ); 2977 %} 2978 2979 // Load vectors (32 bytes long) 2980 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 2981 match(Set dst src); 2982 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 2983 ins_encode %{ 2984 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2985 int vector_len = 2; 2986 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2987 } else { 2988 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2989 } 2990 %} 2991 ins_pipe( fpu_reg_reg ); 2992 %} 2993 2994 // Load vectors (64 bytes long) 2995 instruct loadV64_dword(vecZ dst, memory mem) %{ 2996 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2997 match(Set dst (LoadVector mem)); 2998 ins_cost(125); 2999 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3000 ins_encode %{ 3001 int vector_len = 2; 3002 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3003 %} 3004 ins_pipe( pipe_slow ); 3005 %} 3006 3007 // Load vectors (64 bytes long) 3008 instruct loadV64_qword(vecZ dst, memory mem) %{ 3009 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3010 match(Set dst (LoadVector mem)); 3011 ins_cost(125); 3012 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3013 ins_encode %{ 3014 int vector_len = 2; 3015 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3016 %} 3017 ins_pipe( pipe_slow ); 3018 %} 3019 3020 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3021 match(Set dst src); 3022 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3023 ins_encode %{ 3024 int vector_len = 2; 3025 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3026 %} 3027 ins_pipe( fpu_reg_reg ); 3028 %} 3029 3030 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3031 match(Set dst src); 3032 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3033 ins_encode %{ 3034 int vector_len = 2; 3035 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3036 %} 3037 ins_pipe( fpu_reg_reg ); 3038 %} 3039 3040 // Store vectors 3041 instruct storeV4(memory mem, vecS src) %{ 3042 predicate(n->as_StoreVector()->memory_size() == 4); 3043 match(Set mem (StoreVector mem src)); 3044 ins_cost(145); 3045 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3046 ins_encode %{ 3047 __ movdl($mem$$Address, $src$$XMMRegister); 3048 %} 3049 ins_pipe( pipe_slow ); 3050 %} 3051 3052 instruct storeV8(memory mem, vecD src) %{ 3053 predicate(n->as_StoreVector()->memory_size() == 8); 3054 match(Set mem (StoreVector mem src)); 3055 ins_cost(145); 3056 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3057 ins_encode %{ 3058 __ movq($mem$$Address, $src$$XMMRegister); 3059 %} 3060 ins_pipe( pipe_slow ); 3061 %} 3062 3063 instruct storeV16(memory mem, vecX src) %{ 3064 predicate(n->as_StoreVector()->memory_size() == 16); 3065 match(Set mem (StoreVector mem src)); 3066 ins_cost(145); 3067 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3068 ins_encode %{ 3069 __ movdqu($mem$$Address, $src$$XMMRegister); 3070 %} 3071 ins_pipe( pipe_slow ); 3072 %} 3073 3074 instruct storeV32(memory mem, vecY src) %{ 3075 predicate(n->as_StoreVector()->memory_size() == 32); 3076 match(Set mem (StoreVector mem src)); 3077 ins_cost(145); 3078 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3079 ins_encode %{ 3080 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3081 %} 3082 ins_pipe( pipe_slow ); 3083 %} 3084 3085 instruct storeV64_dword(memory mem, vecZ src) %{ 3086 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3087 match(Set mem (StoreVector mem src)); 3088 ins_cost(145); 3089 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3090 ins_encode %{ 3091 int vector_len = 2; 3092 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3093 %} 3094 ins_pipe( pipe_slow ); 3095 %} 3096 3097 instruct storeV64_qword(memory mem, vecZ src) %{ 3098 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3099 match(Set mem (StoreVector mem src)); 3100 ins_cost(145); 3101 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3102 ins_encode %{ 3103 int vector_len = 2; 3104 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3105 %} 3106 ins_pipe( pipe_slow ); 3107 %} 3108 3109 // ====================LEGACY REPLICATE======================================= 3110 3111 instruct Repl4B_mem(vecS dst, memory mem) %{ 3112 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3113 match(Set dst (ReplicateB (LoadB mem))); 3114 format %{ "punpcklbw $dst,$mem\n\t" 3115 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3116 ins_encode %{ 3117 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3118 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3119 %} 3120 ins_pipe( pipe_slow ); 3121 %} 3122 3123 instruct Repl8B_mem(vecD dst, memory mem) %{ 3124 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3125 match(Set dst (ReplicateB (LoadB mem))); 3126 format %{ "punpcklbw $dst,$mem\n\t" 3127 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3128 ins_encode %{ 3129 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3130 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3131 %} 3132 ins_pipe( pipe_slow ); 3133 %} 3134 3135 instruct Repl16B(vecX dst, rRegI src) %{ 3136 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3137 match(Set dst (ReplicateB src)); 3138 format %{ "movd $dst,$src\n\t" 3139 "punpcklbw $dst,$dst\n\t" 3140 "pshuflw $dst,$dst,0x00\n\t" 3141 "punpcklqdq $dst,$dst\t! replicate16B" %} 3142 ins_encode %{ 3143 __ movdl($dst$$XMMRegister, $src$$Register); 3144 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3145 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3146 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3147 %} 3148 ins_pipe( pipe_slow ); 3149 %} 3150 3151 instruct Repl16B_mem(vecX dst, memory mem) %{ 3152 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3153 match(Set dst (ReplicateB (LoadB mem))); 3154 format %{ "punpcklbw $dst,$mem\n\t" 3155 "pshuflw $dst,$dst,0x00\n\t" 3156 "punpcklqdq $dst,$dst\t! replicate16B" %} 3157 ins_encode %{ 3158 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3159 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3160 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3161 %} 3162 ins_pipe( pipe_slow ); 3163 %} 3164 3165 instruct Repl32B(vecY dst, rRegI src) %{ 3166 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3167 match(Set dst (ReplicateB src)); 3168 format %{ "movd $dst,$src\n\t" 3169 "punpcklbw $dst,$dst\n\t" 3170 "pshuflw $dst,$dst,0x00\n\t" 3171 "punpcklqdq $dst,$dst\n\t" 3172 "vinserti128_high $dst,$dst\t! replicate32B" %} 3173 ins_encode %{ 3174 __ movdl($dst$$XMMRegister, $src$$Register); 3175 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3176 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3177 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3178 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3179 %} 3180 ins_pipe( pipe_slow ); 3181 %} 3182 3183 instruct Repl32B_mem(vecY dst, memory mem) %{ 3184 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3185 match(Set dst (ReplicateB (LoadB mem))); 3186 format %{ "punpcklbw $dst,$mem\n\t" 3187 "pshuflw $dst,$dst,0x00\n\t" 3188 "punpcklqdq $dst,$dst\n\t" 3189 "vinserti128_high $dst,$dst\t! replicate32B" %} 3190 ins_encode %{ 3191 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3192 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3193 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3194 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3195 %} 3196 ins_pipe( pipe_slow ); 3197 %} 3198 3199 instruct Repl64B(legVecZ dst, rRegI src) %{ 3200 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3201 match(Set dst (ReplicateB src)); 3202 format %{ "movd $dst,$src\n\t" 3203 "punpcklbw $dst,$dst\n\t" 3204 "pshuflw $dst,$dst,0x00\n\t" 3205 "punpcklqdq $dst,$dst\n\t" 3206 "vinserti128_high $dst,$dst\t" 3207 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3208 ins_encode %{ 3209 __ movdl($dst$$XMMRegister, $src$$Register); 3210 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3211 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3212 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3213 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3214 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3215 %} 3216 ins_pipe( pipe_slow ); 3217 %} 3218 3219 instruct Repl64B_mem(legVecZ dst, memory mem) %{ 3220 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3221 match(Set dst (ReplicateB (LoadB mem))); 3222 format %{ "punpcklbw $dst,$mem\n\t" 3223 "pshuflw $dst,$dst,0x00\n\t" 3224 "punpcklqdq $dst,$dst\n\t" 3225 "vinserti128_high $dst,$dst\t" 3226 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3227 ins_encode %{ 3228 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3229 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3230 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3231 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3232 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 instruct Repl16B_imm(vecX dst, immI con) %{ 3238 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3239 match(Set dst (ReplicateB con)); 3240 format %{ "movq $dst,[$constantaddress]\n\t" 3241 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3242 ins_encode %{ 3243 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3244 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3245 %} 3246 ins_pipe( pipe_slow ); 3247 %} 3248 3249 instruct Repl32B_imm(vecY dst, immI con) %{ 3250 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3251 match(Set dst (ReplicateB con)); 3252 format %{ "movq $dst,[$constantaddress]\n\t" 3253 "punpcklqdq $dst,$dst\n\t" 3254 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3255 ins_encode %{ 3256 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3257 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3258 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3259 %} 3260 ins_pipe( pipe_slow ); 3261 %} 3262 3263 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3264 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3265 match(Set dst (ReplicateB con)); 3266 format %{ "movq $dst,[$constantaddress]\n\t" 3267 "punpcklqdq $dst,$dst\n\t" 3268 "vinserti128_high $dst,$dst\t" 3269 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3270 ins_encode %{ 3271 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3272 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3273 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3274 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3275 %} 3276 ins_pipe( pipe_slow ); 3277 %} 3278 3279 instruct Repl4S(vecD dst, rRegI src) %{ 3280 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3281 match(Set dst (ReplicateS src)); 3282 format %{ "movd $dst,$src\n\t" 3283 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3284 ins_encode %{ 3285 __ movdl($dst$$XMMRegister, $src$$Register); 3286 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3287 %} 3288 ins_pipe( pipe_slow ); 3289 %} 3290 3291 instruct Repl4S_mem(vecD dst, memory mem) %{ 3292 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3293 match(Set dst (ReplicateS (LoadS mem))); 3294 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3295 ins_encode %{ 3296 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3297 %} 3298 ins_pipe( pipe_slow ); 3299 %} 3300 3301 instruct Repl8S(vecX dst, rRegI src) %{ 3302 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3303 match(Set dst (ReplicateS src)); 3304 format %{ "movd $dst,$src\n\t" 3305 "pshuflw $dst,$dst,0x00\n\t" 3306 "punpcklqdq $dst,$dst\t! replicate8S" %} 3307 ins_encode %{ 3308 __ movdl($dst$$XMMRegister, $src$$Register); 3309 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3310 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3311 %} 3312 ins_pipe( pipe_slow ); 3313 %} 3314 3315 instruct Repl8S_mem(vecX dst, memory mem) %{ 3316 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3317 match(Set dst (ReplicateS (LoadS mem))); 3318 format %{ "pshuflw $dst,$mem,0x00\n\t" 3319 "punpcklqdq $dst,$dst\t! replicate8S" %} 3320 ins_encode %{ 3321 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3322 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3323 %} 3324 ins_pipe( pipe_slow ); 3325 %} 3326 3327 instruct Repl8S_imm(vecX dst, immI con) %{ 3328 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3329 match(Set dst (ReplicateS con)); 3330 format %{ "movq $dst,[$constantaddress]\n\t" 3331 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3332 ins_encode %{ 3333 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3334 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3335 %} 3336 ins_pipe( pipe_slow ); 3337 %} 3338 3339 instruct Repl16S(vecY dst, rRegI src) %{ 3340 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3341 match(Set dst (ReplicateS src)); 3342 format %{ "movd $dst,$src\n\t" 3343 "pshuflw $dst,$dst,0x00\n\t" 3344 "punpcklqdq $dst,$dst\n\t" 3345 "vinserti128_high $dst,$dst\t! replicate16S" %} 3346 ins_encode %{ 3347 __ movdl($dst$$XMMRegister, $src$$Register); 3348 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3349 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3350 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3351 %} 3352 ins_pipe( pipe_slow ); 3353 %} 3354 3355 instruct Repl16S_mem(vecY dst, memory mem) %{ 3356 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3357 match(Set dst (ReplicateS (LoadS mem))); 3358 format %{ "pshuflw $dst,$mem,0x00\n\t" 3359 "punpcklqdq $dst,$dst\n\t" 3360 "vinserti128_high $dst,$dst\t! replicate16S" %} 3361 ins_encode %{ 3362 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3363 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3364 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3365 %} 3366 ins_pipe( pipe_slow ); 3367 %} 3368 3369 instruct Repl16S_imm(vecY dst, immI con) %{ 3370 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3371 match(Set dst (ReplicateS con)); 3372 format %{ "movq $dst,[$constantaddress]\n\t" 3373 "punpcklqdq $dst,$dst\n\t" 3374 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3375 ins_encode %{ 3376 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3377 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3378 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3379 %} 3380 ins_pipe( pipe_slow ); 3381 %} 3382 3383 instruct Repl32S(legVecZ dst, rRegI src) %{ 3384 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3385 match(Set dst (ReplicateS src)); 3386 format %{ "movd $dst,$src\n\t" 3387 "pshuflw $dst,$dst,0x00\n\t" 3388 "punpcklqdq $dst,$dst\n\t" 3389 "vinserti128_high $dst,$dst\t" 3390 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3391 ins_encode %{ 3392 __ movdl($dst$$XMMRegister, $src$$Register); 3393 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3394 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3395 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3396 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3397 %} 3398 ins_pipe( pipe_slow ); 3399 %} 3400 3401 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3402 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3403 match(Set dst (ReplicateS (LoadS mem))); 3404 format %{ "pshuflw $dst,$mem,0x00\n\t" 3405 "punpcklqdq $dst,$dst\n\t" 3406 "vinserti128_high $dst,$dst\t" 3407 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3408 ins_encode %{ 3409 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3410 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3411 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3412 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3413 %} 3414 ins_pipe( pipe_slow ); 3415 %} 3416 3417 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3418 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3419 match(Set dst (ReplicateS con)); 3420 format %{ "movq $dst,[$constantaddress]\n\t" 3421 "punpcklqdq $dst,$dst\n\t" 3422 "vinserti128_high $dst,$dst\t" 3423 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3424 ins_encode %{ 3425 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3426 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3427 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3428 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3429 %} 3430 ins_pipe( pipe_slow ); 3431 %} 3432 3433 instruct Repl4I(vecX dst, rRegI src) %{ 3434 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3435 match(Set dst (ReplicateI src)); 3436 format %{ "movd $dst,$src\n\t" 3437 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3438 ins_encode %{ 3439 __ movdl($dst$$XMMRegister, $src$$Register); 3440 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3441 %} 3442 ins_pipe( pipe_slow ); 3443 %} 3444 3445 instruct Repl4I_mem(vecX dst, memory mem) %{ 3446 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3447 match(Set dst (ReplicateI (LoadI mem))); 3448 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3449 ins_encode %{ 3450 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3451 %} 3452 ins_pipe( pipe_slow ); 3453 %} 3454 3455 instruct Repl8I(vecY dst, rRegI src) %{ 3456 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3457 match(Set dst (ReplicateI src)); 3458 format %{ "movd $dst,$src\n\t" 3459 "pshufd $dst,$dst,0x00\n\t" 3460 "vinserti128_high $dst,$dst\t! replicate8I" %} 3461 ins_encode %{ 3462 __ movdl($dst$$XMMRegister, $src$$Register); 3463 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3464 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3465 %} 3466 ins_pipe( pipe_slow ); 3467 %} 3468 3469 instruct Repl8I_mem(vecY dst, memory mem) %{ 3470 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3471 match(Set dst (ReplicateI (LoadI mem))); 3472 format %{ "pshufd $dst,$mem,0x00\n\t" 3473 "vinserti128_high $dst,$dst\t! replicate8I" %} 3474 ins_encode %{ 3475 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3476 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3477 %} 3478 ins_pipe( pipe_slow ); 3479 %} 3480 3481 instruct Repl16I(legVecZ dst, rRegI src) %{ 3482 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3483 match(Set dst (ReplicateI src)); 3484 format %{ "movd $dst,$src\n\t" 3485 "pshufd $dst,$dst,0x00\n\t" 3486 "vinserti128_high $dst,$dst\t" 3487 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3488 ins_encode %{ 3489 __ movdl($dst$$XMMRegister, $src$$Register); 3490 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3491 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3492 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3493 %} 3494 ins_pipe( pipe_slow ); 3495 %} 3496 3497 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3498 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3499 match(Set dst (ReplicateI (LoadI mem))); 3500 format %{ "pshufd $dst,$mem,0x00\n\t" 3501 "vinserti128_high $dst,$dst\t" 3502 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3503 ins_encode %{ 3504 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3505 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3506 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3507 %} 3508 ins_pipe( pipe_slow ); 3509 %} 3510 3511 instruct Repl4I_imm(vecX dst, immI con) %{ 3512 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3513 match(Set dst (ReplicateI con)); 3514 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3515 "punpcklqdq $dst,$dst" %} 3516 ins_encode %{ 3517 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3518 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3519 %} 3520 ins_pipe( pipe_slow ); 3521 %} 3522 3523 instruct Repl8I_imm(vecY dst, immI con) %{ 3524 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3525 match(Set dst (ReplicateI con)); 3526 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3527 "punpcklqdq $dst,$dst\n\t" 3528 "vinserti128_high $dst,$dst" %} 3529 ins_encode %{ 3530 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3531 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3532 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3533 %} 3534 ins_pipe( pipe_slow ); 3535 %} 3536 3537 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3538 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3539 match(Set dst (ReplicateI con)); 3540 format %{ "movq $dst,[$constantaddress]\t" 3541 "punpcklqdq $dst,$dst\n\t" 3542 "vinserti128_high $dst,$dst" 3543 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3544 ins_encode %{ 3545 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3546 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3547 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3548 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3549 %} 3550 ins_pipe( pipe_slow ); 3551 %} 3552 3553 // Long could be loaded into xmm register directly from memory. 3554 instruct Repl2L_mem(vecX dst, memory mem) %{ 3555 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3556 match(Set dst (ReplicateL (LoadL mem))); 3557 format %{ "movq $dst,$mem\n\t" 3558 "punpcklqdq $dst,$dst\t! replicate2L" %} 3559 ins_encode %{ 3560 __ movq($dst$$XMMRegister, $mem$$Address); 3561 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3562 %} 3563 ins_pipe( pipe_slow ); 3564 %} 3565 3566 // Replicate long (8 byte) scalar to be vector 3567 #ifdef _LP64 3568 instruct Repl4L(vecY dst, rRegL src) %{ 3569 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3570 match(Set dst (ReplicateL src)); 3571 format %{ "movdq $dst,$src\n\t" 3572 "punpcklqdq $dst,$dst\n\t" 3573 "vinserti128_high $dst,$dst\t! replicate4L" %} 3574 ins_encode %{ 3575 __ movdq($dst$$XMMRegister, $src$$Register); 3576 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3577 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3578 %} 3579 ins_pipe( pipe_slow ); 3580 %} 3581 3582 instruct Repl8L(legVecZ dst, rRegL src) %{ 3583 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3584 match(Set dst (ReplicateL src)); 3585 format %{ "movdq $dst,$src\n\t" 3586 "punpcklqdq $dst,$dst\n\t" 3587 "vinserti128_high $dst,$dst\t" 3588 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3589 ins_encode %{ 3590 __ movdq($dst$$XMMRegister, $src$$Register); 3591 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3592 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3593 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3594 %} 3595 ins_pipe( pipe_slow ); 3596 %} 3597 #else // _LP64 3598 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3599 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3600 match(Set dst (ReplicateL src)); 3601 effect(TEMP dst, USE src, TEMP tmp); 3602 format %{ "movdl $dst,$src.lo\n\t" 3603 "movdl $tmp,$src.hi\n\t" 3604 "punpckldq $dst,$tmp\n\t" 3605 "punpcklqdq $dst,$dst\n\t" 3606 "vinserti128_high $dst,$dst\t! replicate4L" %} 3607 ins_encode %{ 3608 __ movdl($dst$$XMMRegister, $src$$Register); 3609 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3610 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3611 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3612 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3618 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3619 match(Set dst (ReplicateL src)); 3620 effect(TEMP dst, USE src, TEMP tmp); 3621 format %{ "movdl $dst,$src.lo\n\t" 3622 "movdl $tmp,$src.hi\n\t" 3623 "punpckldq $dst,$tmp\n\t" 3624 "punpcklqdq $dst,$dst\n\t" 3625 "vinserti128_high $dst,$dst\t" 3626 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3627 ins_encode %{ 3628 __ movdl($dst$$XMMRegister, $src$$Register); 3629 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3630 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3631 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3632 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3633 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3634 %} 3635 ins_pipe( pipe_slow ); 3636 %} 3637 #endif // _LP64 3638 3639 instruct Repl4L_imm(vecY dst, immL con) %{ 3640 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3641 match(Set dst (ReplicateL con)); 3642 format %{ "movq $dst,[$constantaddress]\n\t" 3643 "punpcklqdq $dst,$dst\n\t" 3644 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3645 ins_encode %{ 3646 __ movq($dst$$XMMRegister, $constantaddress($con)); 3647 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3648 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3649 %} 3650 ins_pipe( pipe_slow ); 3651 %} 3652 3653 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3654 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3655 match(Set dst (ReplicateL con)); 3656 format %{ "movq $dst,[$constantaddress]\n\t" 3657 "punpcklqdq $dst,$dst\n\t" 3658 "vinserti128_high $dst,$dst\t" 3659 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3660 ins_encode %{ 3661 __ movq($dst$$XMMRegister, $constantaddress($con)); 3662 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3663 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3664 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3665 %} 3666 ins_pipe( pipe_slow ); 3667 %} 3668 3669 instruct Repl4L_mem(vecY dst, memory mem) %{ 3670 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3671 match(Set dst (ReplicateL (LoadL mem))); 3672 format %{ "movq $dst,$mem\n\t" 3673 "punpcklqdq $dst,$dst\n\t" 3674 "vinserti128_high $dst,$dst\t! replicate4L" %} 3675 ins_encode %{ 3676 __ movq($dst$$XMMRegister, $mem$$Address); 3677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3678 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3679 %} 3680 ins_pipe( pipe_slow ); 3681 %} 3682 3683 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3684 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3685 match(Set dst (ReplicateL (LoadL mem))); 3686 format %{ "movq $dst,$mem\n\t" 3687 "punpcklqdq $dst,$dst\n\t" 3688 "vinserti128_high $dst,$dst\t" 3689 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3690 ins_encode %{ 3691 __ movq($dst$$XMMRegister, $mem$$Address); 3692 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3693 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3694 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3695 %} 3696 ins_pipe( pipe_slow ); 3697 %} 3698 3699 instruct Repl2F_mem(vecD dst, memory mem) %{ 3700 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3701 match(Set dst (ReplicateF (LoadF mem))); 3702 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3703 ins_encode %{ 3704 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3705 %} 3706 ins_pipe( pipe_slow ); 3707 %} 3708 3709 instruct Repl4F_mem(vecX dst, memory mem) %{ 3710 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3711 match(Set dst (ReplicateF (LoadF mem))); 3712 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3713 ins_encode %{ 3714 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3715 %} 3716 ins_pipe( pipe_slow ); 3717 %} 3718 3719 instruct Repl8F(vecY dst, vlRegF src) %{ 3720 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3721 match(Set dst (ReplicateF src)); 3722 format %{ "pshufd $dst,$src,0x00\n\t" 3723 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3726 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct Repl8F_mem(vecY dst, memory mem) %{ 3732 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3733 match(Set dst (ReplicateF (LoadF mem))); 3734 format %{ "pshufd $dst,$mem,0x00\n\t" 3735 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3736 ins_encode %{ 3737 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3738 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3739 %} 3740 ins_pipe( pipe_slow ); 3741 %} 3742 3743 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3744 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3745 match(Set dst (ReplicateF src)); 3746 format %{ "pshufd $dst,$src,0x00\n\t" 3747 "vinsertf128_high $dst,$dst\t" 3748 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3749 ins_encode %{ 3750 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3751 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3752 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3753 %} 3754 ins_pipe( pipe_slow ); 3755 %} 3756 3757 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3758 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3759 match(Set dst (ReplicateF (LoadF mem))); 3760 format %{ "pshufd $dst,$mem,0x00\n\t" 3761 "vinsertf128_high $dst,$dst\t" 3762 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3763 ins_encode %{ 3764 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3765 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3766 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3767 %} 3768 ins_pipe( pipe_slow ); 3769 %} 3770 3771 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3772 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3773 match(Set dst (ReplicateF zero)); 3774 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3775 ins_encode %{ 3776 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3777 %} 3778 ins_pipe( fpu_reg_reg ); 3779 %} 3780 3781 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3782 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3783 match(Set dst (ReplicateF zero)); 3784 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3785 ins_encode %{ 3786 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3787 %} 3788 ins_pipe( fpu_reg_reg ); 3789 %} 3790 3791 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3792 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3793 match(Set dst (ReplicateF zero)); 3794 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3795 ins_encode %{ 3796 int vector_len = 1; 3797 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3798 %} 3799 ins_pipe( fpu_reg_reg ); 3800 %} 3801 3802 instruct Repl2D_mem(vecX dst, memory mem) %{ 3803 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3804 match(Set dst (ReplicateD (LoadD mem))); 3805 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3806 ins_encode %{ 3807 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3808 %} 3809 ins_pipe( pipe_slow ); 3810 %} 3811 3812 instruct Repl4D(vecY dst, vlRegD src) %{ 3813 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3814 match(Set dst (ReplicateD src)); 3815 format %{ "pshufd $dst,$src,0x44\n\t" 3816 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3817 ins_encode %{ 3818 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3819 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3820 %} 3821 ins_pipe( pipe_slow ); 3822 %} 3823 3824 instruct Repl4D_mem(vecY dst, memory mem) %{ 3825 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3826 match(Set dst (ReplicateD (LoadD mem))); 3827 format %{ "pshufd $dst,$mem,0x44\n\t" 3828 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3829 ins_encode %{ 3830 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3831 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3837 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3838 match(Set dst (ReplicateD src)); 3839 format %{ "pshufd $dst,$src,0x44\n\t" 3840 "vinsertf128_high $dst,$dst\t" 3841 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3842 ins_encode %{ 3843 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3844 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3845 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3846 %} 3847 ins_pipe( pipe_slow ); 3848 %} 3849 3850 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3851 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3852 match(Set dst (ReplicateD (LoadD mem))); 3853 format %{ "pshufd $dst,$mem,0x44\n\t" 3854 "vinsertf128_high $dst,$dst\t" 3855 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3856 ins_encode %{ 3857 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3858 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3859 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3860 %} 3861 ins_pipe( pipe_slow ); 3862 %} 3863 3864 // Replicate double (8 byte) scalar zero to be vector 3865 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3866 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3867 match(Set dst (ReplicateD zero)); 3868 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3869 ins_encode %{ 3870 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3871 %} 3872 ins_pipe( fpu_reg_reg ); 3873 %} 3874 3875 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3876 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3877 match(Set dst (ReplicateD zero)); 3878 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3879 ins_encode %{ 3880 int vector_len = 1; 3881 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3882 %} 3883 ins_pipe( fpu_reg_reg ); 3884 %} 3885 3886 // ====================GENERIC REPLICATE========================================== 3887 3888 // Replicate byte scalar to be vector 3889 instruct Repl4B(vecS dst, rRegI src) %{ 3890 predicate(n->as_Vector()->length() == 4); 3891 match(Set dst (ReplicateB src)); 3892 format %{ "movd $dst,$src\n\t" 3893 "punpcklbw $dst,$dst\n\t" 3894 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3895 ins_encode %{ 3896 __ movdl($dst$$XMMRegister, $src$$Register); 3897 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3898 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3899 %} 3900 ins_pipe( pipe_slow ); 3901 %} 3902 3903 instruct Repl8B(vecD dst, rRegI src) %{ 3904 predicate(n->as_Vector()->length() == 8); 3905 match(Set dst (ReplicateB src)); 3906 format %{ "movd $dst,$src\n\t" 3907 "punpcklbw $dst,$dst\n\t" 3908 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3909 ins_encode %{ 3910 __ movdl($dst$$XMMRegister, $src$$Register); 3911 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3912 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 // Replicate byte scalar immediate to be vector by loading from const table. 3918 instruct Repl4B_imm(vecS dst, immI con) %{ 3919 predicate(n->as_Vector()->length() == 4); 3920 match(Set dst (ReplicateB con)); 3921 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3922 ins_encode %{ 3923 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct Repl8B_imm(vecD dst, immI con) %{ 3929 predicate(n->as_Vector()->length() == 8); 3930 match(Set dst (ReplicateB con)); 3931 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3932 ins_encode %{ 3933 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3934 %} 3935 ins_pipe( pipe_slow ); 3936 %} 3937 3938 // Replicate byte scalar zero to be vector 3939 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3940 predicate(n->as_Vector()->length() == 4); 3941 match(Set dst (ReplicateB zero)); 3942 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3943 ins_encode %{ 3944 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3945 %} 3946 ins_pipe( fpu_reg_reg ); 3947 %} 3948 3949 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3950 predicate(n->as_Vector()->length() == 8); 3951 match(Set dst (ReplicateB zero)); 3952 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3953 ins_encode %{ 3954 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3955 %} 3956 ins_pipe( fpu_reg_reg ); 3957 %} 3958 3959 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3960 predicate(n->as_Vector()->length() == 16); 3961 match(Set dst (ReplicateB zero)); 3962 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3963 ins_encode %{ 3964 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3965 %} 3966 ins_pipe( fpu_reg_reg ); 3967 %} 3968 3969 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3970 predicate(n->as_Vector()->length() == 32); 3971 match(Set dst (ReplicateB zero)); 3972 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3973 ins_encode %{ 3974 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3975 int vector_len = 1; 3976 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3977 %} 3978 ins_pipe( fpu_reg_reg ); 3979 %} 3980 3981 // Replicate char/short (2 byte) scalar to be vector 3982 instruct Repl2S(vecS dst, rRegI src) %{ 3983 predicate(n->as_Vector()->length() == 2); 3984 match(Set dst (ReplicateS src)); 3985 format %{ "movd $dst,$src\n\t" 3986 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3987 ins_encode %{ 3988 __ movdl($dst$$XMMRegister, $src$$Register); 3989 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3990 %} 3991 ins_pipe( fpu_reg_reg ); 3992 %} 3993 3994 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3995 instruct Repl2S_imm(vecS dst, immI con) %{ 3996 predicate(n->as_Vector()->length() == 2); 3997 match(Set dst (ReplicateS con)); 3998 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3999 ins_encode %{ 4000 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4001 %} 4002 ins_pipe( fpu_reg_reg ); 4003 %} 4004 4005 instruct Repl4S_imm(vecD dst, immI con) %{ 4006 predicate(n->as_Vector()->length() == 4); 4007 match(Set dst (ReplicateS con)); 4008 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4009 ins_encode %{ 4010 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4011 %} 4012 ins_pipe( fpu_reg_reg ); 4013 %} 4014 4015 // Replicate char/short (2 byte) scalar zero to be vector 4016 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4017 predicate(n->as_Vector()->length() == 2); 4018 match(Set dst (ReplicateS zero)); 4019 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4020 ins_encode %{ 4021 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4022 %} 4023 ins_pipe( fpu_reg_reg ); 4024 %} 4025 4026 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4027 predicate(n->as_Vector()->length() == 4); 4028 match(Set dst (ReplicateS zero)); 4029 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4030 ins_encode %{ 4031 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4032 %} 4033 ins_pipe( fpu_reg_reg ); 4034 %} 4035 4036 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4037 predicate(n->as_Vector()->length() == 8); 4038 match(Set dst (ReplicateS zero)); 4039 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4040 ins_encode %{ 4041 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4042 %} 4043 ins_pipe( fpu_reg_reg ); 4044 %} 4045 4046 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4047 predicate(n->as_Vector()->length() == 16); 4048 match(Set dst (ReplicateS zero)); 4049 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4050 ins_encode %{ 4051 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4052 int vector_len = 1; 4053 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4054 %} 4055 ins_pipe( fpu_reg_reg ); 4056 %} 4057 4058 // Replicate integer (4 byte) scalar to be vector 4059 instruct Repl2I(vecD dst, rRegI src) %{ 4060 predicate(n->as_Vector()->length() == 2); 4061 match(Set dst (ReplicateI src)); 4062 format %{ "movd $dst,$src\n\t" 4063 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4064 ins_encode %{ 4065 __ movdl($dst$$XMMRegister, $src$$Register); 4066 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4067 %} 4068 ins_pipe( fpu_reg_reg ); 4069 %} 4070 4071 // Integer could be loaded into xmm register directly from memory. 4072 instruct Repl2I_mem(vecD dst, memory mem) %{ 4073 predicate(n->as_Vector()->length() == 2); 4074 match(Set dst (ReplicateI (LoadI mem))); 4075 format %{ "movd $dst,$mem\n\t" 4076 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4077 ins_encode %{ 4078 __ movdl($dst$$XMMRegister, $mem$$Address); 4079 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4080 %} 4081 ins_pipe( fpu_reg_reg ); 4082 %} 4083 4084 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4085 instruct Repl2I_imm(vecD dst, immI con) %{ 4086 predicate(n->as_Vector()->length() == 2); 4087 match(Set dst (ReplicateI con)); 4088 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4089 ins_encode %{ 4090 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4091 %} 4092 ins_pipe( fpu_reg_reg ); 4093 %} 4094 4095 // Replicate integer (4 byte) scalar zero to be vector 4096 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4097 predicate(n->as_Vector()->length() == 2); 4098 match(Set dst (ReplicateI zero)); 4099 format %{ "pxor $dst,$dst\t! replicate2I" %} 4100 ins_encode %{ 4101 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4102 %} 4103 ins_pipe( fpu_reg_reg ); 4104 %} 4105 4106 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4107 predicate(n->as_Vector()->length() == 4); 4108 match(Set dst (ReplicateI zero)); 4109 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4110 ins_encode %{ 4111 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4112 %} 4113 ins_pipe( fpu_reg_reg ); 4114 %} 4115 4116 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4117 predicate(n->as_Vector()->length() == 8); 4118 match(Set dst (ReplicateI zero)); 4119 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4120 ins_encode %{ 4121 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4122 int vector_len = 1; 4123 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4124 %} 4125 ins_pipe( fpu_reg_reg ); 4126 %} 4127 4128 // Replicate long (8 byte) scalar to be vector 4129 #ifdef _LP64 4130 instruct Repl2L(vecX dst, rRegL src) %{ 4131 predicate(n->as_Vector()->length() == 2); 4132 match(Set dst (ReplicateL src)); 4133 format %{ "movdq $dst,$src\n\t" 4134 "punpcklqdq $dst,$dst\t! replicate2L" %} 4135 ins_encode %{ 4136 __ movdq($dst$$XMMRegister, $src$$Register); 4137 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4138 %} 4139 ins_pipe( pipe_slow ); 4140 %} 4141 #else // _LP64 4142 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4143 predicate(n->as_Vector()->length() == 2); 4144 match(Set dst (ReplicateL src)); 4145 effect(TEMP dst, USE src, TEMP tmp); 4146 format %{ "movdl $dst,$src.lo\n\t" 4147 "movdl $tmp,$src.hi\n\t" 4148 "punpckldq $dst,$tmp\n\t" 4149 "punpcklqdq $dst,$dst\t! replicate2L"%} 4150 ins_encode %{ 4151 __ movdl($dst$$XMMRegister, $src$$Register); 4152 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4153 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4154 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 #endif // _LP64 4159 4160 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4161 instruct Repl2L_imm(vecX dst, immL con) %{ 4162 predicate(n->as_Vector()->length() == 2); 4163 match(Set dst (ReplicateL con)); 4164 format %{ "movq $dst,[$constantaddress]\n\t" 4165 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4166 ins_encode %{ 4167 __ movq($dst$$XMMRegister, $constantaddress($con)); 4168 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 4173 // Replicate long (8 byte) scalar zero to be vector 4174 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4175 predicate(n->as_Vector()->length() == 2); 4176 match(Set dst (ReplicateL zero)); 4177 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4178 ins_encode %{ 4179 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4180 %} 4181 ins_pipe( fpu_reg_reg ); 4182 %} 4183 4184 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4185 predicate(n->as_Vector()->length() == 4); 4186 match(Set dst (ReplicateL zero)); 4187 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4188 ins_encode %{ 4189 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4190 int vector_len = 1; 4191 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4192 %} 4193 ins_pipe( fpu_reg_reg ); 4194 %} 4195 4196 // Replicate float (4 byte) scalar to be vector 4197 instruct Repl2F(vecD dst, vlRegF src) %{ 4198 predicate(n->as_Vector()->length() == 2); 4199 match(Set dst (ReplicateF src)); 4200 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4201 ins_encode %{ 4202 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4203 %} 4204 ins_pipe( fpu_reg_reg ); 4205 %} 4206 4207 instruct Repl4F(vecX dst, vlRegF src) %{ 4208 predicate(n->as_Vector()->length() == 4); 4209 match(Set dst (ReplicateF src)); 4210 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4211 ins_encode %{ 4212 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4213 %} 4214 ins_pipe( pipe_slow ); 4215 %} 4216 4217 // Replicate double (8 bytes) scalar to be vector 4218 instruct Repl2D(vecX dst, vlRegD src) %{ 4219 predicate(n->as_Vector()->length() == 2); 4220 match(Set dst (ReplicateD src)); 4221 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4222 ins_encode %{ 4223 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 // ====================EVEX REPLICATE============================================= 4229 4230 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4231 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4232 match(Set dst (ReplicateB (LoadB mem))); 4233 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4234 ins_encode %{ 4235 int vector_len = 0; 4236 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4237 %} 4238 ins_pipe( pipe_slow ); 4239 %} 4240 4241 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4242 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4243 match(Set dst (ReplicateB (LoadB mem))); 4244 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4245 ins_encode %{ 4246 int vector_len = 0; 4247 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4248 %} 4249 ins_pipe( pipe_slow ); 4250 %} 4251 4252 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4253 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4254 match(Set dst (ReplicateB src)); 4255 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4256 ins_encode %{ 4257 int vector_len = 0; 4258 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4259 %} 4260 ins_pipe( pipe_slow ); 4261 %} 4262 4263 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4264 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4265 match(Set dst (ReplicateB (LoadB mem))); 4266 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4267 ins_encode %{ 4268 int vector_len = 0; 4269 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4270 %} 4271 ins_pipe( pipe_slow ); 4272 %} 4273 4274 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4275 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4276 match(Set dst (ReplicateB src)); 4277 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4278 ins_encode %{ 4279 int vector_len = 1; 4280 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4281 %} 4282 ins_pipe( pipe_slow ); 4283 %} 4284 4285 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4286 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4287 match(Set dst (ReplicateB (LoadB mem))); 4288 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4289 ins_encode %{ 4290 int vector_len = 1; 4291 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4297 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4298 match(Set dst (ReplicateB src)); 4299 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4300 ins_encode %{ 4301 int vector_len = 2; 4302 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4303 %} 4304 ins_pipe( pipe_slow ); 4305 %} 4306 4307 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4308 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4309 match(Set dst (ReplicateB (LoadB mem))); 4310 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4311 ins_encode %{ 4312 int vector_len = 2; 4313 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4314 %} 4315 ins_pipe( pipe_slow ); 4316 %} 4317 4318 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4319 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4320 match(Set dst (ReplicateB con)); 4321 format %{ "movq $dst,[$constantaddress]\n\t" 4322 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4323 ins_encode %{ 4324 int vector_len = 0; 4325 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4326 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4332 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4333 match(Set dst (ReplicateB con)); 4334 format %{ "movq $dst,[$constantaddress]\n\t" 4335 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4336 ins_encode %{ 4337 int vector_len = 1; 4338 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4339 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4340 %} 4341 ins_pipe( pipe_slow ); 4342 %} 4343 4344 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4345 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4346 match(Set dst (ReplicateB con)); 4347 format %{ "movq $dst,[$constantaddress]\n\t" 4348 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4349 ins_encode %{ 4350 int vector_len = 2; 4351 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4352 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4353 %} 4354 ins_pipe( pipe_slow ); 4355 %} 4356 4357 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4358 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4359 match(Set dst (ReplicateB zero)); 4360 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4361 ins_encode %{ 4362 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4363 int vector_len = 2; 4364 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4365 %} 4366 ins_pipe( fpu_reg_reg ); 4367 %} 4368 4369 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4370 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4371 match(Set dst (ReplicateS src)); 4372 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4373 ins_encode %{ 4374 int vector_len = 0; 4375 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4376 %} 4377 ins_pipe( pipe_slow ); 4378 %} 4379 4380 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4381 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4382 match(Set dst (ReplicateS (LoadS mem))); 4383 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4384 ins_encode %{ 4385 int vector_len = 0; 4386 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4387 %} 4388 ins_pipe( pipe_slow ); 4389 %} 4390 4391 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4392 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4393 match(Set dst (ReplicateS src)); 4394 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4395 ins_encode %{ 4396 int vector_len = 0; 4397 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4403 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4404 match(Set dst (ReplicateS (LoadS mem))); 4405 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4406 ins_encode %{ 4407 int vector_len = 0; 4408 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4409 %} 4410 ins_pipe( pipe_slow ); 4411 %} 4412 4413 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4414 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4415 match(Set dst (ReplicateS src)); 4416 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4417 ins_encode %{ 4418 int vector_len = 1; 4419 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4420 %} 4421 ins_pipe( pipe_slow ); 4422 %} 4423 4424 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4425 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4426 match(Set dst (ReplicateS (LoadS mem))); 4427 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4428 ins_encode %{ 4429 int vector_len = 1; 4430 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4431 %} 4432 ins_pipe( pipe_slow ); 4433 %} 4434 4435 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4436 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4437 match(Set dst (ReplicateS src)); 4438 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4439 ins_encode %{ 4440 int vector_len = 2; 4441 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4442 %} 4443 ins_pipe( pipe_slow ); 4444 %} 4445 4446 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4447 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4448 match(Set dst (ReplicateS (LoadS mem))); 4449 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4450 ins_encode %{ 4451 int vector_len = 2; 4452 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4453 %} 4454 ins_pipe( pipe_slow ); 4455 %} 4456 4457 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4458 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4459 match(Set dst (ReplicateS con)); 4460 format %{ "movq $dst,[$constantaddress]\n\t" 4461 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4462 ins_encode %{ 4463 int vector_len = 0; 4464 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4465 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4466 %} 4467 ins_pipe( pipe_slow ); 4468 %} 4469 4470 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4471 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4472 match(Set dst (ReplicateS con)); 4473 format %{ "movq $dst,[$constantaddress]\n\t" 4474 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4475 ins_encode %{ 4476 int vector_len = 1; 4477 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4478 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4479 %} 4480 ins_pipe( pipe_slow ); 4481 %} 4482 4483 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4484 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4485 match(Set dst (ReplicateS con)); 4486 format %{ "movq $dst,[$constantaddress]\n\t" 4487 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4488 ins_encode %{ 4489 int vector_len = 2; 4490 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4491 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4492 %} 4493 ins_pipe( pipe_slow ); 4494 %} 4495 4496 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4497 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4498 match(Set dst (ReplicateS zero)); 4499 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4500 ins_encode %{ 4501 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4502 int vector_len = 2; 4503 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4504 %} 4505 ins_pipe( fpu_reg_reg ); 4506 %} 4507 4508 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4509 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4510 match(Set dst (ReplicateI src)); 4511 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4512 ins_encode %{ 4513 int vector_len = 0; 4514 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4515 %} 4516 ins_pipe( pipe_slow ); 4517 %} 4518 4519 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4520 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4521 match(Set dst (ReplicateI (LoadI mem))); 4522 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4523 ins_encode %{ 4524 int vector_len = 0; 4525 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4531 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4532 match(Set dst (ReplicateI src)); 4533 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4534 ins_encode %{ 4535 int vector_len = 1; 4536 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4542 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4543 match(Set dst (ReplicateI (LoadI mem))); 4544 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4545 ins_encode %{ 4546 int vector_len = 1; 4547 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4548 %} 4549 ins_pipe( pipe_slow ); 4550 %} 4551 4552 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4553 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4554 match(Set dst (ReplicateI src)); 4555 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4556 ins_encode %{ 4557 int vector_len = 2; 4558 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4559 %} 4560 ins_pipe( pipe_slow ); 4561 %} 4562 4563 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4564 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4565 match(Set dst (ReplicateI (LoadI mem))); 4566 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4567 ins_encode %{ 4568 int vector_len = 2; 4569 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4570 %} 4571 ins_pipe( pipe_slow ); 4572 %} 4573 4574 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4575 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4576 match(Set dst (ReplicateI con)); 4577 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4578 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4579 ins_encode %{ 4580 int vector_len = 0; 4581 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4582 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4583 %} 4584 ins_pipe( pipe_slow ); 4585 %} 4586 4587 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4588 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4589 match(Set dst (ReplicateI con)); 4590 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4591 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4592 ins_encode %{ 4593 int vector_len = 1; 4594 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4595 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4601 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4602 match(Set dst (ReplicateI con)); 4603 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4604 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4605 ins_encode %{ 4606 int vector_len = 2; 4607 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4608 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4614 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4615 match(Set dst (ReplicateI zero)); 4616 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4617 ins_encode %{ 4618 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4619 int vector_len = 2; 4620 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4621 %} 4622 ins_pipe( fpu_reg_reg ); 4623 %} 4624 4625 // Replicate long (8 byte) scalar to be vector 4626 #ifdef _LP64 4627 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4628 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4629 match(Set dst (ReplicateL src)); 4630 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4631 ins_encode %{ 4632 int vector_len = 1; 4633 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4634 %} 4635 ins_pipe( pipe_slow ); 4636 %} 4637 4638 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4639 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4640 match(Set dst (ReplicateL src)); 4641 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4642 ins_encode %{ 4643 int vector_len = 2; 4644 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4645 %} 4646 ins_pipe( pipe_slow ); 4647 %} 4648 #else // _LP64 4649 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4650 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4651 match(Set dst (ReplicateL src)); 4652 effect(TEMP dst, USE src, TEMP tmp); 4653 format %{ "movdl $dst,$src.lo\n\t" 4654 "movdl $tmp,$src.hi\n\t" 4655 "punpckldq $dst,$tmp\n\t" 4656 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4657 ins_encode %{ 4658 int vector_len = 1; 4659 __ movdl($dst$$XMMRegister, $src$$Register); 4660 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4661 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4662 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4663 %} 4664 ins_pipe( pipe_slow ); 4665 %} 4666 4667 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4668 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4669 match(Set dst (ReplicateL src)); 4670 effect(TEMP dst, USE src, TEMP tmp); 4671 format %{ "movdl $dst,$src.lo\n\t" 4672 "movdl $tmp,$src.hi\n\t" 4673 "punpckldq $dst,$tmp\n\t" 4674 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4675 ins_encode %{ 4676 int vector_len = 2; 4677 __ movdl($dst$$XMMRegister, $src$$Register); 4678 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4679 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4680 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4681 %} 4682 ins_pipe( pipe_slow ); 4683 %} 4684 #endif // _LP64 4685 4686 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4687 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4688 match(Set dst (ReplicateL con)); 4689 format %{ "movq $dst,[$constantaddress]\n\t" 4690 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4691 ins_encode %{ 4692 int vector_len = 1; 4693 __ movq($dst$$XMMRegister, $constantaddress($con)); 4694 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4695 %} 4696 ins_pipe( pipe_slow ); 4697 %} 4698 4699 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4700 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4701 match(Set dst (ReplicateL con)); 4702 format %{ "movq $dst,[$constantaddress]\n\t" 4703 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4704 ins_encode %{ 4705 int vector_len = 2; 4706 __ movq($dst$$XMMRegister, $constantaddress($con)); 4707 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4708 %} 4709 ins_pipe( pipe_slow ); 4710 %} 4711 4712 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4713 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4714 match(Set dst (ReplicateL (LoadL mem))); 4715 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4716 ins_encode %{ 4717 int vector_len = 0; 4718 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4719 %} 4720 ins_pipe( pipe_slow ); 4721 %} 4722 4723 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4724 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4725 match(Set dst (ReplicateL (LoadL mem))); 4726 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4727 ins_encode %{ 4728 int vector_len = 1; 4729 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4730 %} 4731 ins_pipe( pipe_slow ); 4732 %} 4733 4734 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4735 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4736 match(Set dst (ReplicateL (LoadL mem))); 4737 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4738 ins_encode %{ 4739 int vector_len = 2; 4740 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4741 %} 4742 ins_pipe( pipe_slow ); 4743 %} 4744 4745 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4746 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4747 match(Set dst (ReplicateL zero)); 4748 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4749 ins_encode %{ 4750 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4751 int vector_len = 2; 4752 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4753 %} 4754 ins_pipe( fpu_reg_reg ); 4755 %} 4756 4757 instruct Repl8F_evex(vecY dst, regF src) %{ 4758 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4759 match(Set dst (ReplicateF src)); 4760 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4761 ins_encode %{ 4762 int vector_len = 1; 4763 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4764 %} 4765 ins_pipe( pipe_slow ); 4766 %} 4767 4768 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4769 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4770 match(Set dst (ReplicateF (LoadF mem))); 4771 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4772 ins_encode %{ 4773 int vector_len = 1; 4774 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4775 %} 4776 ins_pipe( pipe_slow ); 4777 %} 4778 4779 instruct Repl16F_evex(vecZ dst, regF src) %{ 4780 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4781 match(Set dst (ReplicateF src)); 4782 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4783 ins_encode %{ 4784 int vector_len = 2; 4785 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4786 %} 4787 ins_pipe( pipe_slow ); 4788 %} 4789 4790 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4791 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4792 match(Set dst (ReplicateF (LoadF mem))); 4793 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4794 ins_encode %{ 4795 int vector_len = 2; 4796 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4797 %} 4798 ins_pipe( pipe_slow ); 4799 %} 4800 4801 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4802 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4803 match(Set dst (ReplicateF zero)); 4804 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4805 ins_encode %{ 4806 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4807 int vector_len = 2; 4808 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4809 %} 4810 ins_pipe( fpu_reg_reg ); 4811 %} 4812 4813 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4814 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4815 match(Set dst (ReplicateF zero)); 4816 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4817 ins_encode %{ 4818 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4819 int vector_len = 2; 4820 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4821 %} 4822 ins_pipe( fpu_reg_reg ); 4823 %} 4824 4825 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4826 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4827 match(Set dst (ReplicateF zero)); 4828 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4829 ins_encode %{ 4830 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4831 int vector_len = 2; 4832 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4833 %} 4834 ins_pipe( fpu_reg_reg ); 4835 %} 4836 4837 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4838 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4839 match(Set dst (ReplicateF zero)); 4840 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4841 ins_encode %{ 4842 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4843 int vector_len = 2; 4844 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4845 %} 4846 ins_pipe( fpu_reg_reg ); 4847 %} 4848 4849 instruct Repl4D_evex(vecY dst, regD src) %{ 4850 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4851 match(Set dst (ReplicateD src)); 4852 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4853 ins_encode %{ 4854 int vector_len = 1; 4855 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4856 %} 4857 ins_pipe( pipe_slow ); 4858 %} 4859 4860 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4861 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4862 match(Set dst (ReplicateD (LoadD mem))); 4863 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4864 ins_encode %{ 4865 int vector_len = 1; 4866 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4867 %} 4868 ins_pipe( pipe_slow ); 4869 %} 4870 4871 instruct Repl8D_evex(vecZ dst, regD src) %{ 4872 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4873 match(Set dst (ReplicateD src)); 4874 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4875 ins_encode %{ 4876 int vector_len = 2; 4877 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4878 %} 4879 ins_pipe( pipe_slow ); 4880 %} 4881 4882 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4883 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4884 match(Set dst (ReplicateD (LoadD mem))); 4885 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4886 ins_encode %{ 4887 int vector_len = 2; 4888 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4889 %} 4890 ins_pipe( pipe_slow ); 4891 %} 4892 4893 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4894 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4895 match(Set dst (ReplicateD zero)); 4896 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4897 ins_encode %{ 4898 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4899 int vector_len = 2; 4900 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4901 %} 4902 ins_pipe( fpu_reg_reg ); 4903 %} 4904 4905 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4906 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4907 match(Set dst (ReplicateD zero)); 4908 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4909 ins_encode %{ 4910 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4911 int vector_len = 2; 4912 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4913 %} 4914 ins_pipe( fpu_reg_reg ); 4915 %} 4916 4917 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4918 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4919 match(Set dst (ReplicateD zero)); 4920 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4921 ins_encode %{ 4922 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4923 int vector_len = 2; 4924 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4925 %} 4926 ins_pipe( fpu_reg_reg ); 4927 %} 4928 4929 // ====================REDUCTION ARITHMETIC======================================= 4930 4931 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4932 predicate(UseSSE > 2 && UseAVX == 0); 4933 match(Set dst (AddReductionVI src1 src2)); 4934 effect(TEMP tmp2, TEMP tmp); 4935 format %{ "movdqu $tmp2,$src2\n\t" 4936 "phaddd $tmp2,$tmp2\n\t" 4937 "movd $tmp,$src1\n\t" 4938 "paddd $tmp,$tmp2\n\t" 4939 "movd $dst,$tmp\t! add reduction2I" %} 4940 ins_encode %{ 4941 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4942 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4943 __ movdl($tmp$$XMMRegister, $src1$$Register); 4944 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4945 __ movdl($dst$$Register, $tmp$$XMMRegister); 4946 %} 4947 ins_pipe( pipe_slow ); 4948 %} 4949 4950 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4951 predicate(VM_Version::supports_avxonly()); 4952 match(Set dst (AddReductionVI src1 src2)); 4953 effect(TEMP tmp, TEMP tmp2); 4954 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4955 "movd $tmp2,$src1\n\t" 4956 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4957 "movd $dst,$tmp2\t! add reduction2I" %} 4958 ins_encode %{ 4959 int vector_len = 0; 4960 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4961 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4962 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4963 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4969 predicate(UseAVX > 2); 4970 match(Set dst (AddReductionVI src1 src2)); 4971 effect(TEMP tmp, TEMP tmp2); 4972 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4973 "vpaddd $tmp,$src2,$tmp2\n\t" 4974 "movd $tmp2,$src1\n\t" 4975 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4976 "movd $dst,$tmp2\t! add reduction2I" %} 4977 ins_encode %{ 4978 int vector_len = 0; 4979 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4980 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4981 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4982 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4983 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4984 %} 4985 ins_pipe( pipe_slow ); 4986 %} 4987 4988 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4989 predicate(UseSSE > 2 && UseAVX == 0); 4990 match(Set dst (AddReductionVI src1 src2)); 4991 effect(TEMP tmp, TEMP tmp2); 4992 format %{ "movdqu $tmp,$src2\n\t" 4993 "phaddd $tmp,$tmp\n\t" 4994 "phaddd $tmp,$tmp\n\t" 4995 "movd $tmp2,$src1\n\t" 4996 "paddd $tmp2,$tmp\n\t" 4997 "movd $dst,$tmp2\t! add reduction4I" %} 4998 ins_encode %{ 4999 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5000 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5001 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5002 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5003 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5004 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5005 %} 5006 ins_pipe( pipe_slow ); 5007 %} 5008 5009 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5010 predicate(VM_Version::supports_avxonly()); 5011 match(Set dst (AddReductionVI src1 src2)); 5012 effect(TEMP tmp, TEMP tmp2); 5013 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5014 "vphaddd $tmp,$tmp,$tmp\n\t" 5015 "movd $tmp2,$src1\n\t" 5016 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5017 "movd $dst,$tmp2\t! add reduction4I" %} 5018 ins_encode %{ 5019 int vector_len = 0; 5020 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5021 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5022 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5023 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5024 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5025 %} 5026 ins_pipe( pipe_slow ); 5027 %} 5028 5029 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5030 predicate(UseAVX > 2); 5031 match(Set dst (AddReductionVI src1 src2)); 5032 effect(TEMP tmp, TEMP tmp2); 5033 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5034 "vpaddd $tmp,$src2,$tmp2\n\t" 5035 "pshufd $tmp2,$tmp,0x1\n\t" 5036 "vpaddd $tmp,$tmp,$tmp2\n\t" 5037 "movd $tmp2,$src1\n\t" 5038 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5039 "movd $dst,$tmp2\t! add reduction4I" %} 5040 ins_encode %{ 5041 int vector_len = 0; 5042 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5043 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5044 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5045 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5046 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5047 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5048 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5049 %} 5050 ins_pipe( pipe_slow ); 5051 %} 5052 5053 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5054 predicate(VM_Version::supports_avxonly()); 5055 match(Set dst (AddReductionVI src1 src2)); 5056 effect(TEMP tmp, TEMP tmp2); 5057 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5058 "vphaddd $tmp,$tmp,$tmp2\n\t" 5059 "vextracti128_high $tmp2,$tmp\n\t" 5060 "vpaddd $tmp,$tmp,$tmp2\n\t" 5061 "movd $tmp2,$src1\n\t" 5062 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5063 "movd $dst,$tmp2\t! add reduction8I" %} 5064 ins_encode %{ 5065 int vector_len = 1; 5066 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5067 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5068 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5069 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5070 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5071 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5072 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5073 %} 5074 ins_pipe( pipe_slow ); 5075 %} 5076 5077 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5078 predicate(UseAVX > 2); 5079 match(Set dst (AddReductionVI src1 src2)); 5080 effect(TEMP tmp, TEMP tmp2); 5081 format %{ "vextracti128_high $tmp,$src2\n\t" 5082 "vpaddd $tmp,$tmp,$src2\n\t" 5083 "pshufd $tmp2,$tmp,0xE\n\t" 5084 "vpaddd $tmp,$tmp,$tmp2\n\t" 5085 "pshufd $tmp2,$tmp,0x1\n\t" 5086 "vpaddd $tmp,$tmp,$tmp2\n\t" 5087 "movd $tmp2,$src1\n\t" 5088 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5089 "movd $dst,$tmp2\t! add reduction8I" %} 5090 ins_encode %{ 5091 int vector_len = 0; 5092 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5093 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5094 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5095 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5096 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5097 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5098 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5099 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5100 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5101 %} 5102 ins_pipe( pipe_slow ); 5103 %} 5104 5105 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5106 predicate(UseAVX > 2); 5107 match(Set dst (AddReductionVI src1 src2)); 5108 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5109 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5110 "vpaddd $tmp3,$tmp3,$src2\n\t" 5111 "vextracti128_high $tmp,$tmp3\n\t" 5112 "vpaddd $tmp,$tmp,$tmp3\n\t" 5113 "pshufd $tmp2,$tmp,0xE\n\t" 5114 "vpaddd $tmp,$tmp,$tmp2\n\t" 5115 "pshufd $tmp2,$tmp,0x1\n\t" 5116 "vpaddd $tmp,$tmp,$tmp2\n\t" 5117 "movd $tmp2,$src1\n\t" 5118 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5119 "movd $dst,$tmp2\t! mul reduction16I" %} 5120 ins_encode %{ 5121 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5122 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5123 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5124 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5125 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5126 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5127 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5128 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5129 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5130 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5131 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5135 5136 #ifdef _LP64 5137 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5138 predicate(UseAVX > 2); 5139 match(Set dst (AddReductionVL src1 src2)); 5140 effect(TEMP tmp, TEMP tmp2); 5141 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5142 "vpaddq $tmp,$src2,$tmp2\n\t" 5143 "movdq $tmp2,$src1\n\t" 5144 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5145 "movdq $dst,$tmp2\t! add reduction2L" %} 5146 ins_encode %{ 5147 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5148 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5149 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5150 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5151 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5152 %} 5153 ins_pipe( pipe_slow ); 5154 %} 5155 5156 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5157 predicate(UseAVX > 2); 5158 match(Set dst (AddReductionVL src1 src2)); 5159 effect(TEMP tmp, TEMP tmp2); 5160 format %{ "vextracti128_high $tmp,$src2\n\t" 5161 "vpaddq $tmp2,$tmp,$src2\n\t" 5162 "pshufd $tmp,$tmp2,0xE\n\t" 5163 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5164 "movdq $tmp,$src1\n\t" 5165 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5166 "movdq $dst,$tmp2\t! add reduction4L" %} 5167 ins_encode %{ 5168 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5169 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5170 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5171 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5172 __ movdq($tmp$$XMMRegister, $src1$$Register); 5173 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5174 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5175 %} 5176 ins_pipe( pipe_slow ); 5177 %} 5178 5179 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5180 predicate(UseAVX > 2); 5181 match(Set dst (AddReductionVL src1 src2)); 5182 effect(TEMP tmp, TEMP tmp2); 5183 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5184 "vpaddq $tmp2,$tmp2,$src2\n\t" 5185 "vextracti128_high $tmp,$tmp2\n\t" 5186 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5187 "pshufd $tmp,$tmp2,0xE\n\t" 5188 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5189 "movdq $tmp,$src1\n\t" 5190 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5191 "movdq $dst,$tmp2\t! add reduction8L" %} 5192 ins_encode %{ 5193 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5194 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5195 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5196 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5197 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5198 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5199 __ movdq($tmp$$XMMRegister, $src1$$Register); 5200 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5201 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5202 %} 5203 ins_pipe( pipe_slow ); 5204 %} 5205 #endif 5206 5207 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5208 predicate(UseSSE >= 1 && UseAVX == 0); 5209 match(Set dst (AddReductionVF dst src2)); 5210 effect(TEMP dst, TEMP tmp); 5211 format %{ "addss $dst,$src2\n\t" 5212 "pshufd $tmp,$src2,0x01\n\t" 5213 "addss $dst,$tmp\t! add reduction2F" %} 5214 ins_encode %{ 5215 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5216 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5217 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5218 %} 5219 ins_pipe( pipe_slow ); 5220 %} 5221 5222 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5223 predicate(UseAVX > 0); 5224 match(Set dst (AddReductionVF dst src2)); 5225 effect(TEMP dst, TEMP tmp); 5226 format %{ "vaddss $dst,$dst,$src2\n\t" 5227 "pshufd $tmp,$src2,0x01\n\t" 5228 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5229 ins_encode %{ 5230 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5231 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5232 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5233 %} 5234 ins_pipe( pipe_slow ); 5235 %} 5236 5237 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5238 predicate(UseSSE >= 1 && UseAVX == 0); 5239 match(Set dst (AddReductionVF dst src2)); 5240 effect(TEMP dst, TEMP tmp); 5241 format %{ "addss $dst,$src2\n\t" 5242 "pshufd $tmp,$src2,0x01\n\t" 5243 "addss $dst,$tmp\n\t" 5244 "pshufd $tmp,$src2,0x02\n\t" 5245 "addss $dst,$tmp\n\t" 5246 "pshufd $tmp,$src2,0x03\n\t" 5247 "addss $dst,$tmp\t! add reduction4F" %} 5248 ins_encode %{ 5249 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5250 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5251 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5252 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5253 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5254 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5255 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5256 %} 5257 ins_pipe( pipe_slow ); 5258 %} 5259 5260 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5261 predicate(UseAVX > 0); 5262 match(Set dst (AddReductionVF dst src2)); 5263 effect(TEMP tmp, TEMP dst); 5264 format %{ "vaddss $dst,dst,$src2\n\t" 5265 "pshufd $tmp,$src2,0x01\n\t" 5266 "vaddss $dst,$dst,$tmp\n\t" 5267 "pshufd $tmp,$src2,0x02\n\t" 5268 "vaddss $dst,$dst,$tmp\n\t" 5269 "pshufd $tmp,$src2,0x03\n\t" 5270 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5271 ins_encode %{ 5272 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5273 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5274 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5275 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5276 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5277 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5278 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5279 %} 5280 ins_pipe( pipe_slow ); 5281 %} 5282 5283 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5284 predicate(UseAVX > 0); 5285 match(Set dst (AddReductionVF dst src2)); 5286 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5287 format %{ "vaddss $dst,$dst,$src2\n\t" 5288 "pshufd $tmp,$src2,0x01\n\t" 5289 "vaddss $dst,$dst,$tmp\n\t" 5290 "pshufd $tmp,$src2,0x02\n\t" 5291 "vaddss $dst,$dst,$tmp\n\t" 5292 "pshufd $tmp,$src2,0x03\n\t" 5293 "vaddss $dst,$dst,$tmp\n\t" 5294 "vextractf128_high $tmp2,$src2\n\t" 5295 "vaddss $dst,$dst,$tmp2\n\t" 5296 "pshufd $tmp,$tmp2,0x01\n\t" 5297 "vaddss $dst,$dst,$tmp\n\t" 5298 "pshufd $tmp,$tmp2,0x02\n\t" 5299 "vaddss $dst,$dst,$tmp\n\t" 5300 "pshufd $tmp,$tmp2,0x03\n\t" 5301 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5302 ins_encode %{ 5303 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5304 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5305 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5306 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5307 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5308 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5309 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5310 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5311 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5312 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5313 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5314 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5315 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5316 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5317 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5318 %} 5319 ins_pipe( pipe_slow ); 5320 %} 5321 5322 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5323 predicate(UseAVX > 2); 5324 match(Set dst (AddReductionVF dst src2)); 5325 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5326 format %{ "vaddss $dst,$dst,$src2\n\t" 5327 "pshufd $tmp,$src2,0x01\n\t" 5328 "vaddss $dst,$dst,$tmp\n\t" 5329 "pshufd $tmp,$src2,0x02\n\t" 5330 "vaddss $dst,$dst,$tmp\n\t" 5331 "pshufd $tmp,$src2,0x03\n\t" 5332 "vaddss $dst,$dst,$tmp\n\t" 5333 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5334 "vaddss $dst,$dst,$tmp2\n\t" 5335 "pshufd $tmp,$tmp2,0x01\n\t" 5336 "vaddss $dst,$dst,$tmp\n\t" 5337 "pshufd $tmp,$tmp2,0x02\n\t" 5338 "vaddss $dst,$dst,$tmp\n\t" 5339 "pshufd $tmp,$tmp2,0x03\n\t" 5340 "vaddss $dst,$dst,$tmp\n\t" 5341 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5342 "vaddss $dst,$dst,$tmp2\n\t" 5343 "pshufd $tmp,$tmp2,0x01\n\t" 5344 "vaddss $dst,$dst,$tmp\n\t" 5345 "pshufd $tmp,$tmp2,0x02\n\t" 5346 "vaddss $dst,$dst,$tmp\n\t" 5347 "pshufd $tmp,$tmp2,0x03\n\t" 5348 "vaddss $dst,$dst,$tmp\n\t" 5349 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5350 "vaddss $dst,$dst,$tmp2\n\t" 5351 "pshufd $tmp,$tmp2,0x01\n\t" 5352 "vaddss $dst,$dst,$tmp\n\t" 5353 "pshufd $tmp,$tmp2,0x02\n\t" 5354 "vaddss $dst,$dst,$tmp\n\t" 5355 "pshufd $tmp,$tmp2,0x03\n\t" 5356 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5357 ins_encode %{ 5358 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5359 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5360 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5361 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5362 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5363 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5364 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5365 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5366 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5367 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5368 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5369 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5370 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5371 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5372 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5373 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5374 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5375 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5376 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5377 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5378 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5379 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5380 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5381 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5382 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5383 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5384 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5385 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5386 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5387 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5388 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5389 %} 5390 ins_pipe( pipe_slow ); 5391 %} 5392 5393 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5394 predicate(UseSSE >= 1 && UseAVX == 0); 5395 match(Set dst (AddReductionVD dst src2)); 5396 effect(TEMP tmp, TEMP dst); 5397 format %{ "addsd $dst,$src2\n\t" 5398 "pshufd $tmp,$src2,0xE\n\t" 5399 "addsd $dst,$tmp\t! add reduction2D" %} 5400 ins_encode %{ 5401 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5402 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5403 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5404 %} 5405 ins_pipe( pipe_slow ); 5406 %} 5407 5408 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5409 predicate(UseAVX > 0); 5410 match(Set dst (AddReductionVD dst src2)); 5411 effect(TEMP tmp, TEMP dst); 5412 format %{ "vaddsd $dst,$dst,$src2\n\t" 5413 "pshufd $tmp,$src2,0xE\n\t" 5414 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5415 ins_encode %{ 5416 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5417 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5418 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5419 %} 5420 ins_pipe( pipe_slow ); 5421 %} 5422 5423 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5424 predicate(UseAVX > 0); 5425 match(Set dst (AddReductionVD dst src2)); 5426 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5427 format %{ "vaddsd $dst,$dst,$src2\n\t" 5428 "pshufd $tmp,$src2,0xE\n\t" 5429 "vaddsd $dst,$dst,$tmp\n\t" 5430 "vextractf128 $tmp2,$src2,0x1\n\t" 5431 "vaddsd $dst,$dst,$tmp2\n\t" 5432 "pshufd $tmp,$tmp2,0xE\n\t" 5433 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5434 ins_encode %{ 5435 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5436 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5437 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5438 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5439 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5440 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5441 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5447 predicate(UseAVX > 2); 5448 match(Set dst (AddReductionVD dst src2)); 5449 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5450 format %{ "vaddsd $dst,$dst,$src2\n\t" 5451 "pshufd $tmp,$src2,0xE\n\t" 5452 "vaddsd $dst,$dst,$tmp\n\t" 5453 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5454 "vaddsd $dst,$dst,$tmp2\n\t" 5455 "pshufd $tmp,$tmp2,0xE\n\t" 5456 "vaddsd $dst,$dst,$tmp\n\t" 5457 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5458 "vaddsd $dst,$dst,$tmp2\n\t" 5459 "pshufd $tmp,$tmp2,0xE\n\t" 5460 "vaddsd $dst,$dst,$tmp\n\t" 5461 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5462 "vaddsd $dst,$dst,$tmp2\n\t" 5463 "pshufd $tmp,$tmp2,0xE\n\t" 5464 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5465 ins_encode %{ 5466 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5467 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5468 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5469 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5470 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5471 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5472 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5473 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5474 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5475 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5476 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5477 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5478 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5479 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5480 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5481 %} 5482 ins_pipe( pipe_slow ); 5483 %} 5484 5485 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5486 predicate(UseSSE > 3 && UseAVX == 0); 5487 match(Set dst (MulReductionVI src1 src2)); 5488 effect(TEMP tmp, TEMP tmp2); 5489 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5490 "pmulld $tmp2,$src2\n\t" 5491 "movd $tmp,$src1\n\t" 5492 "pmulld $tmp2,$tmp\n\t" 5493 "movd $dst,$tmp2\t! mul reduction2I" %} 5494 ins_encode %{ 5495 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5496 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5497 __ movdl($tmp$$XMMRegister, $src1$$Register); 5498 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5499 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5505 predicate(UseAVX > 0); 5506 match(Set dst (MulReductionVI src1 src2)); 5507 effect(TEMP tmp, TEMP tmp2); 5508 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5509 "vpmulld $tmp,$src2,$tmp2\n\t" 5510 "movd $tmp2,$src1\n\t" 5511 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5512 "movd $dst,$tmp2\t! mul reduction2I" %} 5513 ins_encode %{ 5514 int vector_len = 0; 5515 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5516 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5517 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5518 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5519 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5520 %} 5521 ins_pipe( pipe_slow ); 5522 %} 5523 5524 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5525 predicate(UseSSE > 3 && UseAVX == 0); 5526 match(Set dst (MulReductionVI src1 src2)); 5527 effect(TEMP tmp, TEMP tmp2); 5528 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5529 "pmulld $tmp2,$src2\n\t" 5530 "pshufd $tmp,$tmp2,0x1\n\t" 5531 "pmulld $tmp2,$tmp\n\t" 5532 "movd $tmp,$src1\n\t" 5533 "pmulld $tmp2,$tmp\n\t" 5534 "movd $dst,$tmp2\t! mul reduction4I" %} 5535 ins_encode %{ 5536 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5537 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5538 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5539 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5540 __ movdl($tmp$$XMMRegister, $src1$$Register); 5541 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5542 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5548 predicate(UseAVX > 0); 5549 match(Set dst (MulReductionVI src1 src2)); 5550 effect(TEMP tmp, TEMP tmp2); 5551 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5552 "vpmulld $tmp,$src2,$tmp2\n\t" 5553 "pshufd $tmp2,$tmp,0x1\n\t" 5554 "vpmulld $tmp,$tmp,$tmp2\n\t" 5555 "movd $tmp2,$src1\n\t" 5556 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5557 "movd $dst,$tmp2\t! mul reduction4I" %} 5558 ins_encode %{ 5559 int vector_len = 0; 5560 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5561 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5562 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5563 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5564 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5565 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5566 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5567 %} 5568 ins_pipe( pipe_slow ); 5569 %} 5570 5571 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5572 predicate(UseAVX > 1); 5573 match(Set dst (MulReductionVI src1 src2)); 5574 effect(TEMP tmp, TEMP tmp2); 5575 format %{ "vextracti128_high $tmp,$src2\n\t" 5576 "vpmulld $tmp,$tmp,$src2\n\t" 5577 "pshufd $tmp2,$tmp,0xE\n\t" 5578 "vpmulld $tmp,$tmp,$tmp2\n\t" 5579 "pshufd $tmp2,$tmp,0x1\n\t" 5580 "vpmulld $tmp,$tmp,$tmp2\n\t" 5581 "movd $tmp2,$src1\n\t" 5582 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5583 "movd $dst,$tmp2\t! mul reduction8I" %} 5584 ins_encode %{ 5585 int vector_len = 0; 5586 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5587 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5588 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5589 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5590 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5591 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5592 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5593 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5594 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5595 %} 5596 ins_pipe( pipe_slow ); 5597 %} 5598 5599 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5600 predicate(UseAVX > 2); 5601 match(Set dst (MulReductionVI src1 src2)); 5602 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5603 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5604 "vpmulld $tmp3,$tmp3,$src2\n\t" 5605 "vextracti128_high $tmp,$tmp3\n\t" 5606 "vpmulld $tmp,$tmp,$src2\n\t" 5607 "pshufd $tmp2,$tmp,0xE\n\t" 5608 "vpmulld $tmp,$tmp,$tmp2\n\t" 5609 "pshufd $tmp2,$tmp,0x1\n\t" 5610 "vpmulld $tmp,$tmp,$tmp2\n\t" 5611 "movd $tmp2,$src1\n\t" 5612 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5613 "movd $dst,$tmp2\t! mul reduction16I" %} 5614 ins_encode %{ 5615 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5616 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5617 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5618 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5619 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5620 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5621 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5622 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5623 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5624 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5625 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5626 %} 5627 ins_pipe( pipe_slow ); 5628 %} 5629 5630 #ifdef _LP64 5631 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5632 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5633 match(Set dst (MulReductionVL src1 src2)); 5634 effect(TEMP tmp, TEMP tmp2); 5635 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5636 "vpmullq $tmp,$src2,$tmp2\n\t" 5637 "movdq $tmp2,$src1\n\t" 5638 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5639 "movdq $dst,$tmp2\t! mul reduction2L" %} 5640 ins_encode %{ 5641 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5642 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5643 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5644 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5645 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5646 %} 5647 ins_pipe( pipe_slow ); 5648 %} 5649 5650 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5651 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5652 match(Set dst (MulReductionVL src1 src2)); 5653 effect(TEMP tmp, TEMP tmp2); 5654 format %{ "vextracti128_high $tmp,$src2\n\t" 5655 "vpmullq $tmp2,$tmp,$src2\n\t" 5656 "pshufd $tmp,$tmp2,0xE\n\t" 5657 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5658 "movdq $tmp,$src1\n\t" 5659 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5660 "movdq $dst,$tmp2\t! mul reduction4L" %} 5661 ins_encode %{ 5662 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5663 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5664 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5665 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5666 __ movdq($tmp$$XMMRegister, $src1$$Register); 5667 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5668 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5669 %} 5670 ins_pipe( pipe_slow ); 5671 %} 5672 5673 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5674 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5675 match(Set dst (MulReductionVL src1 src2)); 5676 effect(TEMP tmp, TEMP tmp2); 5677 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5678 "vpmullq $tmp2,$tmp2,$src2\n\t" 5679 "vextracti128_high $tmp,$tmp2\n\t" 5680 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5681 "pshufd $tmp,$tmp2,0xE\n\t" 5682 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5683 "movdq $tmp,$src1\n\t" 5684 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5685 "movdq $dst,$tmp2\t! mul reduction8L" %} 5686 ins_encode %{ 5687 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5688 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5689 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5690 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5691 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5692 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5693 __ movdq($tmp$$XMMRegister, $src1$$Register); 5694 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5695 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5696 %} 5697 ins_pipe( pipe_slow ); 5698 %} 5699 #endif 5700 5701 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5702 predicate(UseSSE >= 1 && UseAVX == 0); 5703 match(Set dst (MulReductionVF dst src2)); 5704 effect(TEMP dst, TEMP tmp); 5705 format %{ "mulss $dst,$src2\n\t" 5706 "pshufd $tmp,$src2,0x01\n\t" 5707 "mulss $dst,$tmp\t! mul reduction2F" %} 5708 ins_encode %{ 5709 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5710 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5711 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5717 predicate(UseAVX > 0); 5718 match(Set dst (MulReductionVF dst src2)); 5719 effect(TEMP tmp, TEMP dst); 5720 format %{ "vmulss $dst,$dst,$src2\n\t" 5721 "pshufd $tmp,$src2,0x01\n\t" 5722 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5723 ins_encode %{ 5724 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5725 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5726 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5727 %} 5728 ins_pipe( pipe_slow ); 5729 %} 5730 5731 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5732 predicate(UseSSE >= 1 && UseAVX == 0); 5733 match(Set dst (MulReductionVF dst src2)); 5734 effect(TEMP dst, TEMP tmp); 5735 format %{ "mulss $dst,$src2\n\t" 5736 "pshufd $tmp,$src2,0x01\n\t" 5737 "mulss $dst,$tmp\n\t" 5738 "pshufd $tmp,$src2,0x02\n\t" 5739 "mulss $dst,$tmp\n\t" 5740 "pshufd $tmp,$src2,0x03\n\t" 5741 "mulss $dst,$tmp\t! mul reduction4F" %} 5742 ins_encode %{ 5743 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5744 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5745 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5746 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5747 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5748 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5749 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5750 %} 5751 ins_pipe( pipe_slow ); 5752 %} 5753 5754 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5755 predicate(UseAVX > 0); 5756 match(Set dst (MulReductionVF dst src2)); 5757 effect(TEMP tmp, TEMP dst); 5758 format %{ "vmulss $dst,$dst,$src2\n\t" 5759 "pshufd $tmp,$src2,0x01\n\t" 5760 "vmulss $dst,$dst,$tmp\n\t" 5761 "pshufd $tmp,$src2,0x02\n\t" 5762 "vmulss $dst,$dst,$tmp\n\t" 5763 "pshufd $tmp,$src2,0x03\n\t" 5764 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5765 ins_encode %{ 5766 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5767 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5768 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5769 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5770 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5771 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5772 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5778 predicate(UseAVX > 0); 5779 match(Set dst (MulReductionVF dst src2)); 5780 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5781 format %{ "vmulss $dst,$dst,$src2\n\t" 5782 "pshufd $tmp,$src2,0x01\n\t" 5783 "vmulss $dst,$dst,$tmp\n\t" 5784 "pshufd $tmp,$src2,0x02\n\t" 5785 "vmulss $dst,$dst,$tmp\n\t" 5786 "pshufd $tmp,$src2,0x03\n\t" 5787 "vmulss $dst,$dst,$tmp\n\t" 5788 "vextractf128_high $tmp2,$src2\n\t" 5789 "vmulss $dst,$dst,$tmp2\n\t" 5790 "pshufd $tmp,$tmp2,0x01\n\t" 5791 "vmulss $dst,$dst,$tmp\n\t" 5792 "pshufd $tmp,$tmp2,0x02\n\t" 5793 "vmulss $dst,$dst,$tmp\n\t" 5794 "pshufd $tmp,$tmp2,0x03\n\t" 5795 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5796 ins_encode %{ 5797 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5798 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5799 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5800 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5801 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5802 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5803 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5804 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5805 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5806 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5807 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5808 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5809 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5810 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5811 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5812 %} 5813 ins_pipe( pipe_slow ); 5814 %} 5815 5816 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5817 predicate(UseAVX > 2); 5818 match(Set dst (MulReductionVF dst src2)); 5819 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5820 format %{ "vmulss $dst,$dst,$src2\n\t" 5821 "pshufd $tmp,$src2,0x01\n\t" 5822 "vmulss $dst,$dst,$tmp\n\t" 5823 "pshufd $tmp,$src2,0x02\n\t" 5824 "vmulss $dst,$dst,$tmp\n\t" 5825 "pshufd $tmp,$src2,0x03\n\t" 5826 "vmulss $dst,$dst,$tmp\n\t" 5827 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5828 "vmulss $dst,$dst,$tmp2\n\t" 5829 "pshufd $tmp,$tmp2,0x01\n\t" 5830 "vmulss $dst,$dst,$tmp\n\t" 5831 "pshufd $tmp,$tmp2,0x02\n\t" 5832 "vmulss $dst,$dst,$tmp\n\t" 5833 "pshufd $tmp,$tmp2,0x03\n\t" 5834 "vmulss $dst,$dst,$tmp\n\t" 5835 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5836 "vmulss $dst,$dst,$tmp2\n\t" 5837 "pshufd $tmp,$tmp2,0x01\n\t" 5838 "vmulss $dst,$dst,$tmp\n\t" 5839 "pshufd $tmp,$tmp2,0x02\n\t" 5840 "vmulss $dst,$dst,$tmp\n\t" 5841 "pshufd $tmp,$tmp2,0x03\n\t" 5842 "vmulss $dst,$dst,$tmp\n\t" 5843 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5844 "vmulss $dst,$dst,$tmp2\n\t" 5845 "pshufd $tmp,$tmp2,0x01\n\t" 5846 "vmulss $dst,$dst,$tmp\n\t" 5847 "pshufd $tmp,$tmp2,0x02\n\t" 5848 "vmulss $dst,$dst,$tmp\n\t" 5849 "pshufd $tmp,$tmp2,0x03\n\t" 5850 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5851 ins_encode %{ 5852 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5853 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5854 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5855 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5856 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5857 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5858 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5859 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5860 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5861 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5862 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5863 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5864 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5865 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5866 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5867 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5868 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5869 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5870 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5871 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5872 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5873 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5874 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5875 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5876 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5877 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5878 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5879 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5880 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5881 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5882 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5888 predicate(UseSSE >= 1 && UseAVX == 0); 5889 match(Set dst (MulReductionVD dst src2)); 5890 effect(TEMP dst, TEMP tmp); 5891 format %{ "mulsd $dst,$src2\n\t" 5892 "pshufd $tmp,$src2,0xE\n\t" 5893 "mulsd $dst,$tmp\t! mul reduction2D" %} 5894 ins_encode %{ 5895 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5896 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5897 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5903 predicate(UseAVX > 0); 5904 match(Set dst (MulReductionVD dst src2)); 5905 effect(TEMP tmp, TEMP dst); 5906 format %{ "vmulsd $dst,$dst,$src2\n\t" 5907 "pshufd $tmp,$src2,0xE\n\t" 5908 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5909 ins_encode %{ 5910 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5911 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5912 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5913 %} 5914 ins_pipe( pipe_slow ); 5915 %} 5916 5917 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5918 predicate(UseAVX > 0); 5919 match(Set dst (MulReductionVD dst src2)); 5920 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5921 format %{ "vmulsd $dst,$dst,$src2\n\t" 5922 "pshufd $tmp,$src2,0xE\n\t" 5923 "vmulsd $dst,$dst,$tmp\n\t" 5924 "vextractf128_high $tmp2,$src2\n\t" 5925 "vmulsd $dst,$dst,$tmp2\n\t" 5926 "pshufd $tmp,$tmp2,0xE\n\t" 5927 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5928 ins_encode %{ 5929 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5930 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5931 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5932 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5933 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5934 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5935 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5936 %} 5937 ins_pipe( pipe_slow ); 5938 %} 5939 5940 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5941 predicate(UseAVX > 2); 5942 match(Set dst (MulReductionVD dst src2)); 5943 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5944 format %{ "vmulsd $dst,$dst,$src2\n\t" 5945 "pshufd $tmp,$src2,0xE\n\t" 5946 "vmulsd $dst,$dst,$tmp\n\t" 5947 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5948 "vmulsd $dst,$dst,$tmp2\n\t" 5949 "pshufd $tmp,$src2,0xE\n\t" 5950 "vmulsd $dst,$dst,$tmp\n\t" 5951 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5952 "vmulsd $dst,$dst,$tmp2\n\t" 5953 "pshufd $tmp,$tmp2,0xE\n\t" 5954 "vmulsd $dst,$dst,$tmp\n\t" 5955 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5956 "vmulsd $dst,$dst,$tmp2\n\t" 5957 "pshufd $tmp,$tmp2,0xE\n\t" 5958 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5959 ins_encode %{ 5960 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5961 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5962 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5963 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5964 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5965 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5966 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5967 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5968 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5969 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5970 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5971 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5972 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5973 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5974 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5975 %} 5976 ins_pipe( pipe_slow ); 5977 %} 5978 5979 // ====================VECTOR ARITHMETIC======================================= 5980 5981 // --------------------------------- ADD -------------------------------------- 5982 5983 // Bytes vector add 5984 instruct vadd4B(vecS dst, vecS src) %{ 5985 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5986 match(Set dst (AddVB dst src)); 5987 format %{ "paddb $dst,$src\t! add packed4B" %} 5988 ins_encode %{ 5989 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5995 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5996 match(Set dst (AddVB src1 src2)); 5997 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5998 ins_encode %{ 5999 int vector_len = 0; 6000 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 6006 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 6007 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6008 match(Set dst (AddVB src (LoadVector mem))); 6009 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 6010 ins_encode %{ 6011 int vector_len = 0; 6012 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6013 %} 6014 ins_pipe( pipe_slow ); 6015 %} 6016 6017 instruct vadd8B(vecD dst, vecD src) %{ 6018 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6019 match(Set dst (AddVB dst src)); 6020 format %{ "paddb $dst,$src\t! add packed8B" %} 6021 ins_encode %{ 6022 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6023 %} 6024 ins_pipe( pipe_slow ); 6025 %} 6026 6027 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 6028 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6029 match(Set dst (AddVB src1 src2)); 6030 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 6031 ins_encode %{ 6032 int vector_len = 0; 6033 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 6039 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 6040 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6041 match(Set dst (AddVB src (LoadVector mem))); 6042 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 6043 ins_encode %{ 6044 int vector_len = 0; 6045 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6046 %} 6047 ins_pipe( pipe_slow ); 6048 %} 6049 6050 instruct vadd16B(vecX dst, vecX src) %{ 6051 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6052 match(Set dst (AddVB dst src)); 6053 format %{ "paddb $dst,$src\t! add packed16B" %} 6054 ins_encode %{ 6055 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 6061 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6062 match(Set dst (AddVB src1 src2)); 6063 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 6064 ins_encode %{ 6065 int vector_len = 0; 6066 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 6072 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6073 match(Set dst (AddVB src (LoadVector mem))); 6074 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6075 ins_encode %{ 6076 int vector_len = 0; 6077 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 6083 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6084 match(Set dst (AddVB src1 src2)); 6085 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6086 ins_encode %{ 6087 int vector_len = 1; 6088 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6089 %} 6090 ins_pipe( pipe_slow ); 6091 %} 6092 6093 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6094 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6095 match(Set dst (AddVB src (LoadVector mem))); 6096 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6097 ins_encode %{ 6098 int vector_len = 1; 6099 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6100 %} 6101 ins_pipe( pipe_slow ); 6102 %} 6103 6104 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6105 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6106 match(Set dst (AddVB src1 src2)); 6107 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6108 ins_encode %{ 6109 int vector_len = 2; 6110 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6111 %} 6112 ins_pipe( pipe_slow ); 6113 %} 6114 6115 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6116 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6117 match(Set dst (AddVB src (LoadVector mem))); 6118 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6119 ins_encode %{ 6120 int vector_len = 2; 6121 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 // Shorts/Chars vector add 6127 instruct vadd2S(vecS dst, vecS src) %{ 6128 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6129 match(Set dst (AddVS dst src)); 6130 format %{ "paddw $dst,$src\t! add packed2S" %} 6131 ins_encode %{ 6132 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6133 %} 6134 ins_pipe( pipe_slow ); 6135 %} 6136 6137 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6138 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6139 match(Set dst (AddVS src1 src2)); 6140 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6141 ins_encode %{ 6142 int vector_len = 0; 6143 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6144 %} 6145 ins_pipe( pipe_slow ); 6146 %} 6147 6148 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6149 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6150 match(Set dst (AddVS src (LoadVector mem))); 6151 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6152 ins_encode %{ 6153 int vector_len = 0; 6154 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6155 %} 6156 ins_pipe( pipe_slow ); 6157 %} 6158 6159 instruct vadd4S(vecD dst, vecD src) %{ 6160 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6161 match(Set dst (AddVS dst src)); 6162 format %{ "paddw $dst,$src\t! add packed4S" %} 6163 ins_encode %{ 6164 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6170 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6171 match(Set dst (AddVS src1 src2)); 6172 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6173 ins_encode %{ 6174 int vector_len = 0; 6175 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6176 %} 6177 ins_pipe( pipe_slow ); 6178 %} 6179 6180 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6181 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6182 match(Set dst (AddVS src (LoadVector mem))); 6183 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6184 ins_encode %{ 6185 int vector_len = 0; 6186 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6187 %} 6188 ins_pipe( pipe_slow ); 6189 %} 6190 6191 instruct vadd8S(vecX dst, vecX src) %{ 6192 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6193 match(Set dst (AddVS dst src)); 6194 format %{ "paddw $dst,$src\t! add packed8S" %} 6195 ins_encode %{ 6196 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6202 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6203 match(Set dst (AddVS src1 src2)); 6204 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6205 ins_encode %{ 6206 int vector_len = 0; 6207 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6213 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6214 match(Set dst (AddVS src (LoadVector mem))); 6215 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6216 ins_encode %{ 6217 int vector_len = 0; 6218 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6219 %} 6220 ins_pipe( pipe_slow ); 6221 %} 6222 6223 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6224 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6225 match(Set dst (AddVS src1 src2)); 6226 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6227 ins_encode %{ 6228 int vector_len = 1; 6229 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6230 %} 6231 ins_pipe( pipe_slow ); 6232 %} 6233 6234 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6235 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6236 match(Set dst (AddVS src (LoadVector mem))); 6237 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6238 ins_encode %{ 6239 int vector_len = 1; 6240 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6241 %} 6242 ins_pipe( pipe_slow ); 6243 %} 6244 6245 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6246 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6247 match(Set dst (AddVS src1 src2)); 6248 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6249 ins_encode %{ 6250 int vector_len = 2; 6251 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6252 %} 6253 ins_pipe( pipe_slow ); 6254 %} 6255 6256 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6257 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6258 match(Set dst (AddVS src (LoadVector mem))); 6259 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6260 ins_encode %{ 6261 int vector_len = 2; 6262 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6263 %} 6264 ins_pipe( pipe_slow ); 6265 %} 6266 6267 // Integers vector add 6268 instruct vadd2I(vecD dst, vecD src) %{ 6269 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6270 match(Set dst (AddVI dst src)); 6271 format %{ "paddd $dst,$src\t! add packed2I" %} 6272 ins_encode %{ 6273 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6274 %} 6275 ins_pipe( pipe_slow ); 6276 %} 6277 6278 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6279 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6280 match(Set dst (AddVI src1 src2)); 6281 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6282 ins_encode %{ 6283 int vector_len = 0; 6284 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6291 match(Set dst (AddVI src (LoadVector mem))); 6292 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6293 ins_encode %{ 6294 int vector_len = 0; 6295 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6296 %} 6297 ins_pipe( pipe_slow ); 6298 %} 6299 6300 instruct vadd4I(vecX dst, vecX src) %{ 6301 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6302 match(Set dst (AddVI dst src)); 6303 format %{ "paddd $dst,$src\t! add packed4I" %} 6304 ins_encode %{ 6305 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6306 %} 6307 ins_pipe( pipe_slow ); 6308 %} 6309 6310 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6311 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6312 match(Set dst (AddVI src1 src2)); 6313 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6314 ins_encode %{ 6315 int vector_len = 0; 6316 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6317 %} 6318 ins_pipe( pipe_slow ); 6319 %} 6320 6321 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6322 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6323 match(Set dst (AddVI src (LoadVector mem))); 6324 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6325 ins_encode %{ 6326 int vector_len = 0; 6327 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6333 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6334 match(Set dst (AddVI src1 src2)); 6335 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6336 ins_encode %{ 6337 int vector_len = 1; 6338 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6339 %} 6340 ins_pipe( pipe_slow ); 6341 %} 6342 6343 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6344 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6345 match(Set dst (AddVI src (LoadVector mem))); 6346 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6347 ins_encode %{ 6348 int vector_len = 1; 6349 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6350 %} 6351 ins_pipe( pipe_slow ); 6352 %} 6353 6354 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6355 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6356 match(Set dst (AddVI src1 src2)); 6357 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6358 ins_encode %{ 6359 int vector_len = 2; 6360 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6361 %} 6362 ins_pipe( pipe_slow ); 6363 %} 6364 6365 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6366 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6367 match(Set dst (AddVI src (LoadVector mem))); 6368 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6369 ins_encode %{ 6370 int vector_len = 2; 6371 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6372 %} 6373 ins_pipe( pipe_slow ); 6374 %} 6375 6376 // Longs vector add 6377 instruct vadd2L(vecX dst, vecX src) %{ 6378 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6379 match(Set dst (AddVL dst src)); 6380 format %{ "paddq $dst,$src\t! add packed2L" %} 6381 ins_encode %{ 6382 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6383 %} 6384 ins_pipe( pipe_slow ); 6385 %} 6386 6387 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6388 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6389 match(Set dst (AddVL src1 src2)); 6390 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6391 ins_encode %{ 6392 int vector_len = 0; 6393 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6394 %} 6395 ins_pipe( pipe_slow ); 6396 %} 6397 6398 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6399 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6400 match(Set dst (AddVL src (LoadVector mem))); 6401 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6402 ins_encode %{ 6403 int vector_len = 0; 6404 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6405 %} 6406 ins_pipe( pipe_slow ); 6407 %} 6408 6409 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6410 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6411 match(Set dst (AddVL src1 src2)); 6412 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6413 ins_encode %{ 6414 int vector_len = 1; 6415 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6416 %} 6417 ins_pipe( pipe_slow ); 6418 %} 6419 6420 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6421 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6422 match(Set dst (AddVL src (LoadVector mem))); 6423 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6424 ins_encode %{ 6425 int vector_len = 1; 6426 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6427 %} 6428 ins_pipe( pipe_slow ); 6429 %} 6430 6431 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6432 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6433 match(Set dst (AddVL src1 src2)); 6434 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6435 ins_encode %{ 6436 int vector_len = 2; 6437 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6438 %} 6439 ins_pipe( pipe_slow ); 6440 %} 6441 6442 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6443 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6444 match(Set dst (AddVL src (LoadVector mem))); 6445 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6446 ins_encode %{ 6447 int vector_len = 2; 6448 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6449 %} 6450 ins_pipe( pipe_slow ); 6451 %} 6452 6453 // Floats vector add 6454 instruct vadd2F(vecD dst, vecD src) %{ 6455 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6456 match(Set dst (AddVF dst src)); 6457 format %{ "addps $dst,$src\t! add packed2F" %} 6458 ins_encode %{ 6459 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6460 %} 6461 ins_pipe( pipe_slow ); 6462 %} 6463 6464 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6465 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6466 match(Set dst (AddVF src1 src2)); 6467 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6468 ins_encode %{ 6469 int vector_len = 0; 6470 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6471 %} 6472 ins_pipe( pipe_slow ); 6473 %} 6474 6475 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6476 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6477 match(Set dst (AddVF src (LoadVector mem))); 6478 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6479 ins_encode %{ 6480 int vector_len = 0; 6481 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6482 %} 6483 ins_pipe( pipe_slow ); 6484 %} 6485 6486 instruct vadd4F(vecX dst, vecX src) %{ 6487 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6488 match(Set dst (AddVF dst src)); 6489 format %{ "addps $dst,$src\t! add packed4F" %} 6490 ins_encode %{ 6491 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6492 %} 6493 ins_pipe( pipe_slow ); 6494 %} 6495 6496 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6497 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6498 match(Set dst (AddVF src1 src2)); 6499 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6500 ins_encode %{ 6501 int vector_len = 0; 6502 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6508 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6509 match(Set dst (AddVF src (LoadVector mem))); 6510 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6511 ins_encode %{ 6512 int vector_len = 0; 6513 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6519 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6520 match(Set dst (AddVF src1 src2)); 6521 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6522 ins_encode %{ 6523 int vector_len = 1; 6524 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6525 %} 6526 ins_pipe( pipe_slow ); 6527 %} 6528 6529 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6530 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6531 match(Set dst (AddVF src (LoadVector mem))); 6532 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6533 ins_encode %{ 6534 int vector_len = 1; 6535 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6541 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6542 match(Set dst (AddVF src1 src2)); 6543 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6544 ins_encode %{ 6545 int vector_len = 2; 6546 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6547 %} 6548 ins_pipe( pipe_slow ); 6549 %} 6550 6551 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6552 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6553 match(Set dst (AddVF src (LoadVector mem))); 6554 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6555 ins_encode %{ 6556 int vector_len = 2; 6557 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6558 %} 6559 ins_pipe( pipe_slow ); 6560 %} 6561 6562 // Doubles vector add 6563 instruct vadd2D(vecX dst, vecX src) %{ 6564 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6565 match(Set dst (AddVD dst src)); 6566 format %{ "addpd $dst,$src\t! add packed2D" %} 6567 ins_encode %{ 6568 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6569 %} 6570 ins_pipe( pipe_slow ); 6571 %} 6572 6573 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6574 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6575 match(Set dst (AddVD src1 src2)); 6576 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6577 ins_encode %{ 6578 int vector_len = 0; 6579 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6580 %} 6581 ins_pipe( pipe_slow ); 6582 %} 6583 6584 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6585 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6586 match(Set dst (AddVD src (LoadVector mem))); 6587 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6588 ins_encode %{ 6589 int vector_len = 0; 6590 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6591 %} 6592 ins_pipe( pipe_slow ); 6593 %} 6594 6595 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6596 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6597 match(Set dst (AddVD src1 src2)); 6598 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6599 ins_encode %{ 6600 int vector_len = 1; 6601 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6602 %} 6603 ins_pipe( pipe_slow ); 6604 %} 6605 6606 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6607 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6608 match(Set dst (AddVD src (LoadVector mem))); 6609 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6610 ins_encode %{ 6611 int vector_len = 1; 6612 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6613 %} 6614 ins_pipe( pipe_slow ); 6615 %} 6616 6617 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6618 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6619 match(Set dst (AddVD src1 src2)); 6620 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6621 ins_encode %{ 6622 int vector_len = 2; 6623 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6624 %} 6625 ins_pipe( pipe_slow ); 6626 %} 6627 6628 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6629 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6630 match(Set dst (AddVD src (LoadVector mem))); 6631 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6632 ins_encode %{ 6633 int vector_len = 2; 6634 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6635 %} 6636 ins_pipe( pipe_slow ); 6637 %} 6638 6639 // --------------------------------- SUB -------------------------------------- 6640 6641 // Bytes vector sub 6642 instruct vsub4B(vecS dst, vecS src) %{ 6643 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6644 match(Set dst (SubVB dst src)); 6645 format %{ "psubb $dst,$src\t! sub packed4B" %} 6646 ins_encode %{ 6647 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6648 %} 6649 ins_pipe( pipe_slow ); 6650 %} 6651 6652 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6653 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6654 match(Set dst (SubVB src1 src2)); 6655 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6656 ins_encode %{ 6657 int vector_len = 0; 6658 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6659 %} 6660 ins_pipe( pipe_slow ); 6661 %} 6662 6663 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6664 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6665 match(Set dst (SubVB src (LoadVector mem))); 6666 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6667 ins_encode %{ 6668 int vector_len = 0; 6669 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6670 %} 6671 ins_pipe( pipe_slow ); 6672 %} 6673 6674 instruct vsub8B(vecD dst, vecD src) %{ 6675 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6676 match(Set dst (SubVB dst src)); 6677 format %{ "psubb $dst,$src\t! sub packed8B" %} 6678 ins_encode %{ 6679 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6680 %} 6681 ins_pipe( pipe_slow ); 6682 %} 6683 6684 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6685 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6686 match(Set dst (SubVB src1 src2)); 6687 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6688 ins_encode %{ 6689 int vector_len = 0; 6690 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6691 %} 6692 ins_pipe( pipe_slow ); 6693 %} 6694 6695 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6696 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6697 match(Set dst (SubVB src (LoadVector mem))); 6698 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6699 ins_encode %{ 6700 int vector_len = 0; 6701 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 instruct vsub16B(vecX dst, vecX src) %{ 6707 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6708 match(Set dst (SubVB dst src)); 6709 format %{ "psubb $dst,$src\t! sub packed16B" %} 6710 ins_encode %{ 6711 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6712 %} 6713 ins_pipe( pipe_slow ); 6714 %} 6715 6716 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6717 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6718 match(Set dst (SubVB src1 src2)); 6719 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6720 ins_encode %{ 6721 int vector_len = 0; 6722 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6728 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6729 match(Set dst (SubVB src (LoadVector mem))); 6730 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6731 ins_encode %{ 6732 int vector_len = 0; 6733 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6734 %} 6735 ins_pipe( pipe_slow ); 6736 %} 6737 6738 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6739 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6740 match(Set dst (SubVB src1 src2)); 6741 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6742 ins_encode %{ 6743 int vector_len = 1; 6744 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6745 %} 6746 ins_pipe( pipe_slow ); 6747 %} 6748 6749 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6750 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6751 match(Set dst (SubVB src (LoadVector mem))); 6752 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6753 ins_encode %{ 6754 int vector_len = 1; 6755 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6756 %} 6757 ins_pipe( pipe_slow ); 6758 %} 6759 6760 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6761 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6762 match(Set dst (SubVB src1 src2)); 6763 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6764 ins_encode %{ 6765 int vector_len = 2; 6766 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6767 %} 6768 ins_pipe( pipe_slow ); 6769 %} 6770 6771 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6772 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6773 match(Set dst (SubVB src (LoadVector mem))); 6774 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6775 ins_encode %{ 6776 int vector_len = 2; 6777 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 // Shorts/Chars vector sub 6783 instruct vsub2S(vecS dst, vecS src) %{ 6784 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6785 match(Set dst (SubVS dst src)); 6786 format %{ "psubw $dst,$src\t! sub packed2S" %} 6787 ins_encode %{ 6788 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6789 %} 6790 ins_pipe( pipe_slow ); 6791 %} 6792 6793 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6794 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6795 match(Set dst (SubVS src1 src2)); 6796 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6797 ins_encode %{ 6798 int vector_len = 0; 6799 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6800 %} 6801 ins_pipe( pipe_slow ); 6802 %} 6803 6804 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6805 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6806 match(Set dst (SubVS src (LoadVector mem))); 6807 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6808 ins_encode %{ 6809 int vector_len = 0; 6810 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6811 %} 6812 ins_pipe( pipe_slow ); 6813 %} 6814 6815 instruct vsub4S(vecD dst, vecD src) %{ 6816 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6817 match(Set dst (SubVS dst src)); 6818 format %{ "psubw $dst,$src\t! sub packed4S" %} 6819 ins_encode %{ 6820 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6821 %} 6822 ins_pipe( pipe_slow ); 6823 %} 6824 6825 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6826 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6827 match(Set dst (SubVS src1 src2)); 6828 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6829 ins_encode %{ 6830 int vector_len = 0; 6831 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6832 %} 6833 ins_pipe( pipe_slow ); 6834 %} 6835 6836 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6837 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6838 match(Set dst (SubVS src (LoadVector mem))); 6839 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6840 ins_encode %{ 6841 int vector_len = 0; 6842 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6843 %} 6844 ins_pipe( pipe_slow ); 6845 %} 6846 6847 instruct vsub8S(vecX dst, vecX src) %{ 6848 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6849 match(Set dst (SubVS dst src)); 6850 format %{ "psubw $dst,$src\t! sub packed8S" %} 6851 ins_encode %{ 6852 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6853 %} 6854 ins_pipe( pipe_slow ); 6855 %} 6856 6857 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6858 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6859 match(Set dst (SubVS src1 src2)); 6860 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6861 ins_encode %{ 6862 int vector_len = 0; 6863 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6864 %} 6865 ins_pipe( pipe_slow ); 6866 %} 6867 6868 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6869 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6870 match(Set dst (SubVS src (LoadVector mem))); 6871 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6872 ins_encode %{ 6873 int vector_len = 0; 6874 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6875 %} 6876 ins_pipe( pipe_slow ); 6877 %} 6878 6879 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6880 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6881 match(Set dst (SubVS src1 src2)); 6882 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6883 ins_encode %{ 6884 int vector_len = 1; 6885 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6886 %} 6887 ins_pipe( pipe_slow ); 6888 %} 6889 6890 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6891 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6892 match(Set dst (SubVS src (LoadVector mem))); 6893 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6894 ins_encode %{ 6895 int vector_len = 1; 6896 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6897 %} 6898 ins_pipe( pipe_slow ); 6899 %} 6900 6901 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6902 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6903 match(Set dst (SubVS src1 src2)); 6904 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6905 ins_encode %{ 6906 int vector_len = 2; 6907 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6908 %} 6909 ins_pipe( pipe_slow ); 6910 %} 6911 6912 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6913 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6914 match(Set dst (SubVS src (LoadVector mem))); 6915 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6916 ins_encode %{ 6917 int vector_len = 2; 6918 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6919 %} 6920 ins_pipe( pipe_slow ); 6921 %} 6922 6923 // Integers vector sub 6924 instruct vsub2I(vecD dst, vecD src) %{ 6925 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6926 match(Set dst (SubVI dst src)); 6927 format %{ "psubd $dst,$src\t! sub packed2I" %} 6928 ins_encode %{ 6929 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6930 %} 6931 ins_pipe( pipe_slow ); 6932 %} 6933 6934 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6935 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6936 match(Set dst (SubVI src1 src2)); 6937 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6938 ins_encode %{ 6939 int vector_len = 0; 6940 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6946 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6947 match(Set dst (SubVI src (LoadVector mem))); 6948 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6949 ins_encode %{ 6950 int vector_len = 0; 6951 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6952 %} 6953 ins_pipe( pipe_slow ); 6954 %} 6955 6956 instruct vsub4I(vecX dst, vecX src) %{ 6957 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6958 match(Set dst (SubVI dst src)); 6959 format %{ "psubd $dst,$src\t! sub packed4I" %} 6960 ins_encode %{ 6961 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6962 %} 6963 ins_pipe( pipe_slow ); 6964 %} 6965 6966 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6967 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6968 match(Set dst (SubVI src1 src2)); 6969 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6970 ins_encode %{ 6971 int vector_len = 0; 6972 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6978 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6979 match(Set dst (SubVI src (LoadVector mem))); 6980 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6981 ins_encode %{ 6982 int vector_len = 0; 6983 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6984 %} 6985 ins_pipe( pipe_slow ); 6986 %} 6987 6988 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6989 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6990 match(Set dst (SubVI src1 src2)); 6991 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6992 ins_encode %{ 6993 int vector_len = 1; 6994 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6995 %} 6996 ins_pipe( pipe_slow ); 6997 %} 6998 6999 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7000 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7001 match(Set dst (SubVI src (LoadVector mem))); 7002 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7003 ins_encode %{ 7004 int vector_len = 1; 7005 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7011 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7012 match(Set dst (SubVI src1 src2)); 7013 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7014 ins_encode %{ 7015 int vector_len = 2; 7016 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7017 %} 7018 ins_pipe( pipe_slow ); 7019 %} 7020 7021 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7022 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7023 match(Set dst (SubVI src (LoadVector mem))); 7024 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7025 ins_encode %{ 7026 int vector_len = 2; 7027 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7028 %} 7029 ins_pipe( pipe_slow ); 7030 %} 7031 7032 // Longs vector sub 7033 instruct vsub2L(vecX dst, vecX src) %{ 7034 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7035 match(Set dst (SubVL dst src)); 7036 format %{ "psubq $dst,$src\t! sub packed2L" %} 7037 ins_encode %{ 7038 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7044 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7045 match(Set dst (SubVL src1 src2)); 7046 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7047 ins_encode %{ 7048 int vector_len = 0; 7049 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7050 %} 7051 ins_pipe( pipe_slow ); 7052 %} 7053 7054 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7055 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7056 match(Set dst (SubVL src (LoadVector mem))); 7057 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7058 ins_encode %{ 7059 int vector_len = 0; 7060 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7061 %} 7062 ins_pipe( pipe_slow ); 7063 %} 7064 7065 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7066 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7067 match(Set dst (SubVL src1 src2)); 7068 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7069 ins_encode %{ 7070 int vector_len = 1; 7071 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7072 %} 7073 ins_pipe( pipe_slow ); 7074 %} 7075 7076 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7077 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7078 match(Set dst (SubVL src (LoadVector mem))); 7079 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7080 ins_encode %{ 7081 int vector_len = 1; 7082 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7083 %} 7084 ins_pipe( pipe_slow ); 7085 %} 7086 7087 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7088 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7089 match(Set dst (SubVL src1 src2)); 7090 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7091 ins_encode %{ 7092 int vector_len = 2; 7093 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7094 %} 7095 ins_pipe( pipe_slow ); 7096 %} 7097 7098 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7099 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7100 match(Set dst (SubVL src (LoadVector mem))); 7101 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7102 ins_encode %{ 7103 int vector_len = 2; 7104 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7105 %} 7106 ins_pipe( pipe_slow ); 7107 %} 7108 7109 // Floats vector sub 7110 instruct vsub2F(vecD dst, vecD src) %{ 7111 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7112 match(Set dst (SubVF dst src)); 7113 format %{ "subps $dst,$src\t! sub packed2F" %} 7114 ins_encode %{ 7115 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7116 %} 7117 ins_pipe( pipe_slow ); 7118 %} 7119 7120 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7121 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7122 match(Set dst (SubVF src1 src2)); 7123 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7124 ins_encode %{ 7125 int vector_len = 0; 7126 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7127 %} 7128 ins_pipe( pipe_slow ); 7129 %} 7130 7131 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7132 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7133 match(Set dst (SubVF src (LoadVector mem))); 7134 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7135 ins_encode %{ 7136 int vector_len = 0; 7137 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7138 %} 7139 ins_pipe( pipe_slow ); 7140 %} 7141 7142 instruct vsub4F(vecX dst, vecX src) %{ 7143 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7144 match(Set dst (SubVF dst src)); 7145 format %{ "subps $dst,$src\t! sub packed4F" %} 7146 ins_encode %{ 7147 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7153 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7154 match(Set dst (SubVF src1 src2)); 7155 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7156 ins_encode %{ 7157 int vector_len = 0; 7158 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7159 %} 7160 ins_pipe( pipe_slow ); 7161 %} 7162 7163 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7164 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7165 match(Set dst (SubVF src (LoadVector mem))); 7166 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7167 ins_encode %{ 7168 int vector_len = 0; 7169 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7170 %} 7171 ins_pipe( pipe_slow ); 7172 %} 7173 7174 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7175 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7176 match(Set dst (SubVF src1 src2)); 7177 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7178 ins_encode %{ 7179 int vector_len = 1; 7180 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7186 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7187 match(Set dst (SubVF src (LoadVector mem))); 7188 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7189 ins_encode %{ 7190 int vector_len = 1; 7191 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7197 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7198 match(Set dst (SubVF src1 src2)); 7199 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7200 ins_encode %{ 7201 int vector_len = 2; 7202 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7203 %} 7204 ins_pipe( pipe_slow ); 7205 %} 7206 7207 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7208 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7209 match(Set dst (SubVF src (LoadVector mem))); 7210 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7211 ins_encode %{ 7212 int vector_len = 2; 7213 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7214 %} 7215 ins_pipe( pipe_slow ); 7216 %} 7217 7218 // Doubles vector sub 7219 instruct vsub2D(vecX dst, vecX src) %{ 7220 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7221 match(Set dst (SubVD dst src)); 7222 format %{ "subpd $dst,$src\t! sub packed2D" %} 7223 ins_encode %{ 7224 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7225 %} 7226 ins_pipe( pipe_slow ); 7227 %} 7228 7229 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7230 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7231 match(Set dst (SubVD src1 src2)); 7232 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7233 ins_encode %{ 7234 int vector_len = 0; 7235 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7236 %} 7237 ins_pipe( pipe_slow ); 7238 %} 7239 7240 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7241 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7242 match(Set dst (SubVD src (LoadVector mem))); 7243 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7244 ins_encode %{ 7245 int vector_len = 0; 7246 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7252 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7253 match(Set dst (SubVD src1 src2)); 7254 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7255 ins_encode %{ 7256 int vector_len = 1; 7257 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7258 %} 7259 ins_pipe( pipe_slow ); 7260 %} 7261 7262 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7263 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7264 match(Set dst (SubVD src (LoadVector mem))); 7265 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7266 ins_encode %{ 7267 int vector_len = 1; 7268 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7269 %} 7270 ins_pipe( pipe_slow ); 7271 %} 7272 7273 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7274 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7275 match(Set dst (SubVD src1 src2)); 7276 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7277 ins_encode %{ 7278 int vector_len = 2; 7279 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7280 %} 7281 ins_pipe( pipe_slow ); 7282 %} 7283 7284 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7285 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7286 match(Set dst (SubVD src (LoadVector mem))); 7287 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7288 ins_encode %{ 7289 int vector_len = 2; 7290 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7291 %} 7292 ins_pipe( pipe_slow ); 7293 %} 7294 7295 // --------------------------------- MUL -------------------------------------- 7296 7297 // Shorts/Chars vector mul 7298 instruct vmul2S(vecS dst, vecS src) %{ 7299 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7300 match(Set dst (MulVS dst src)); 7301 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7302 ins_encode %{ 7303 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7304 %} 7305 ins_pipe( pipe_slow ); 7306 %} 7307 7308 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7309 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7310 match(Set dst (MulVS src1 src2)); 7311 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7312 ins_encode %{ 7313 int vector_len = 0; 7314 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7320 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7321 match(Set dst (MulVS src (LoadVector mem))); 7322 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7323 ins_encode %{ 7324 int vector_len = 0; 7325 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7326 %} 7327 ins_pipe( pipe_slow ); 7328 %} 7329 7330 instruct vmul4S(vecD dst, vecD src) %{ 7331 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7332 match(Set dst (MulVS dst src)); 7333 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7334 ins_encode %{ 7335 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7341 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7342 match(Set dst (MulVS src1 src2)); 7343 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7344 ins_encode %{ 7345 int vector_len = 0; 7346 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7352 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7353 match(Set dst (MulVS src (LoadVector mem))); 7354 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7355 ins_encode %{ 7356 int vector_len = 0; 7357 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 instruct vmul8S(vecX dst, vecX src) %{ 7363 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7364 match(Set dst (MulVS dst src)); 7365 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7366 ins_encode %{ 7367 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7368 %} 7369 ins_pipe( pipe_slow ); 7370 %} 7371 7372 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7374 match(Set dst (MulVS src1 src2)); 7375 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7376 ins_encode %{ 7377 int vector_len = 0; 7378 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7379 %} 7380 ins_pipe( pipe_slow ); 7381 %} 7382 7383 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7384 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7385 match(Set dst (MulVS src (LoadVector mem))); 7386 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7387 ins_encode %{ 7388 int vector_len = 0; 7389 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7390 %} 7391 ins_pipe( pipe_slow ); 7392 %} 7393 7394 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7395 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7396 match(Set dst (MulVS src1 src2)); 7397 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7398 ins_encode %{ 7399 int vector_len = 1; 7400 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7406 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7407 match(Set dst (MulVS src (LoadVector mem))); 7408 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7409 ins_encode %{ 7410 int vector_len = 1; 7411 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7417 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7418 match(Set dst (MulVS src1 src2)); 7419 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7420 ins_encode %{ 7421 int vector_len = 2; 7422 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7428 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7429 match(Set dst (MulVS src (LoadVector mem))); 7430 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7431 ins_encode %{ 7432 int vector_len = 2; 7433 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7434 %} 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 // Integers vector mul (sse4_1) 7439 instruct vmul2I(vecD dst, vecD src) %{ 7440 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7441 match(Set dst (MulVI dst src)); 7442 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7443 ins_encode %{ 7444 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7445 %} 7446 ins_pipe( pipe_slow ); 7447 %} 7448 7449 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7450 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7451 match(Set dst (MulVI src1 src2)); 7452 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7453 ins_encode %{ 7454 int vector_len = 0; 7455 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7456 %} 7457 ins_pipe( pipe_slow ); 7458 %} 7459 7460 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7461 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7462 match(Set dst (MulVI src (LoadVector mem))); 7463 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7464 ins_encode %{ 7465 int vector_len = 0; 7466 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7467 %} 7468 ins_pipe( pipe_slow ); 7469 %} 7470 7471 instruct vmul4I(vecX dst, vecX src) %{ 7472 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7473 match(Set dst (MulVI dst src)); 7474 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7475 ins_encode %{ 7476 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7477 %} 7478 ins_pipe( pipe_slow ); 7479 %} 7480 7481 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7482 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7483 match(Set dst (MulVI src1 src2)); 7484 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7485 ins_encode %{ 7486 int vector_len = 0; 7487 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7488 %} 7489 ins_pipe( pipe_slow ); 7490 %} 7491 7492 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7493 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7494 match(Set dst (MulVI src (LoadVector mem))); 7495 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7496 ins_encode %{ 7497 int vector_len = 0; 7498 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7499 %} 7500 ins_pipe( pipe_slow ); 7501 %} 7502 7503 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7504 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7505 match(Set dst (MulVL src1 src2)); 7506 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7507 ins_encode %{ 7508 int vector_len = 0; 7509 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7510 %} 7511 ins_pipe( pipe_slow ); 7512 %} 7513 7514 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7515 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7516 match(Set dst (MulVL src (LoadVector mem))); 7517 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7518 ins_encode %{ 7519 int vector_len = 0; 7520 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7521 %} 7522 ins_pipe( pipe_slow ); 7523 %} 7524 7525 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7526 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7527 match(Set dst (MulVL src1 src2)); 7528 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7529 ins_encode %{ 7530 int vector_len = 1; 7531 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7532 %} 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7537 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7538 match(Set dst (MulVL src (LoadVector mem))); 7539 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7540 ins_encode %{ 7541 int vector_len = 1; 7542 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7543 %} 7544 ins_pipe( pipe_slow ); 7545 %} 7546 7547 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7548 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7549 match(Set dst (MulVL src1 src2)); 7550 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7551 ins_encode %{ 7552 int vector_len = 2; 7553 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7554 %} 7555 ins_pipe( pipe_slow ); 7556 %} 7557 7558 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7559 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7560 match(Set dst (MulVL src (LoadVector mem))); 7561 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7562 ins_encode %{ 7563 int vector_len = 2; 7564 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7570 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7571 match(Set dst (MulVI src1 src2)); 7572 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7573 ins_encode %{ 7574 int vector_len = 1; 7575 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7576 %} 7577 ins_pipe( pipe_slow ); 7578 %} 7579 7580 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7581 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7582 match(Set dst (MulVI src (LoadVector mem))); 7583 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7584 ins_encode %{ 7585 int vector_len = 1; 7586 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7587 %} 7588 ins_pipe( pipe_slow ); 7589 %} 7590 7591 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7592 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7593 match(Set dst (MulVI src1 src2)); 7594 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7595 ins_encode %{ 7596 int vector_len = 2; 7597 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7598 %} 7599 ins_pipe( pipe_slow ); 7600 %} 7601 7602 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7603 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7604 match(Set dst (MulVI src (LoadVector mem))); 7605 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7606 ins_encode %{ 7607 int vector_len = 2; 7608 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7609 %} 7610 ins_pipe( pipe_slow ); 7611 %} 7612 7613 // Floats vector mul 7614 instruct vmul2F(vecD dst, vecD src) %{ 7615 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7616 match(Set dst (MulVF dst src)); 7617 format %{ "mulps $dst,$src\t! mul packed2F" %} 7618 ins_encode %{ 7619 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7620 %} 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7625 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7626 match(Set dst (MulVF src1 src2)); 7627 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7628 ins_encode %{ 7629 int vector_len = 0; 7630 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7631 %} 7632 ins_pipe( pipe_slow ); 7633 %} 7634 7635 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7636 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7637 match(Set dst (MulVF src (LoadVector mem))); 7638 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7639 ins_encode %{ 7640 int vector_len = 0; 7641 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7642 %} 7643 ins_pipe( pipe_slow ); 7644 %} 7645 7646 instruct vmul4F(vecX dst, vecX src) %{ 7647 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7648 match(Set dst (MulVF dst src)); 7649 format %{ "mulps $dst,$src\t! mul packed4F" %} 7650 ins_encode %{ 7651 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7652 %} 7653 ins_pipe( pipe_slow ); 7654 %} 7655 7656 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7658 match(Set dst (MulVF src1 src2)); 7659 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7660 ins_encode %{ 7661 int vector_len = 0; 7662 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7663 %} 7664 ins_pipe( pipe_slow ); 7665 %} 7666 7667 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7668 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7669 match(Set dst (MulVF src (LoadVector mem))); 7670 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7671 ins_encode %{ 7672 int vector_len = 0; 7673 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7679 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7680 match(Set dst (MulVF src1 src2)); 7681 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7682 ins_encode %{ 7683 int vector_len = 1; 7684 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7690 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7691 match(Set dst (MulVF src (LoadVector mem))); 7692 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7693 ins_encode %{ 7694 int vector_len = 1; 7695 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7701 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7702 match(Set dst (MulVF src1 src2)); 7703 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7704 ins_encode %{ 7705 int vector_len = 2; 7706 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7712 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7713 match(Set dst (MulVF src (LoadVector mem))); 7714 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7715 ins_encode %{ 7716 int vector_len = 2; 7717 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 // Doubles vector mul 7723 instruct vmul2D(vecX dst, vecX src) %{ 7724 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7725 match(Set dst (MulVD dst src)); 7726 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7727 ins_encode %{ 7728 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7734 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7735 match(Set dst (MulVD src1 src2)); 7736 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7737 ins_encode %{ 7738 int vector_len = 0; 7739 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7745 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7746 match(Set dst (MulVD src (LoadVector mem))); 7747 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7748 ins_encode %{ 7749 int vector_len = 0; 7750 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7751 %} 7752 ins_pipe( pipe_slow ); 7753 %} 7754 7755 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7756 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7757 match(Set dst (MulVD src1 src2)); 7758 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7759 ins_encode %{ 7760 int vector_len = 1; 7761 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 7766 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7767 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7768 match(Set dst (MulVD src (LoadVector mem))); 7769 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7770 ins_encode %{ 7771 int vector_len = 1; 7772 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7778 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7779 match(Set dst (MulVD src1 src2)); 7780 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7781 ins_encode %{ 7782 int vector_len = 2; 7783 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7789 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7790 match(Set dst (MulVD src (LoadVector mem))); 7791 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7792 ins_encode %{ 7793 int vector_len = 2; 7794 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7800 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7801 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7802 effect(TEMP dst, USE src1, USE src2); 7803 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7804 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7805 %} 7806 ins_encode %{ 7807 int vector_len = 1; 7808 int cond = (Assembler::Condition)($copnd$$cmpcode); 7809 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7810 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7811 %} 7812 ins_pipe( pipe_slow ); 7813 %} 7814 7815 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7816 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7817 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7818 effect(TEMP dst, USE src1, USE src2); 7819 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7820 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7821 %} 7822 ins_encode %{ 7823 int vector_len = 1; 7824 int cond = (Assembler::Condition)($copnd$$cmpcode); 7825 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7826 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 // --------------------------------- DIV -------------------------------------- 7832 7833 // Floats vector div 7834 instruct vdiv2F(vecD dst, vecD src) %{ 7835 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7836 match(Set dst (DivVF dst src)); 7837 format %{ "divps $dst,$src\t! div packed2F" %} 7838 ins_encode %{ 7839 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7840 %} 7841 ins_pipe( pipe_slow ); 7842 %} 7843 7844 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7845 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7846 match(Set dst (DivVF src1 src2)); 7847 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7848 ins_encode %{ 7849 int vector_len = 0; 7850 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7851 %} 7852 ins_pipe( pipe_slow ); 7853 %} 7854 7855 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7856 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7857 match(Set dst (DivVF src (LoadVector mem))); 7858 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7859 ins_encode %{ 7860 int vector_len = 0; 7861 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7862 %} 7863 ins_pipe( pipe_slow ); 7864 %} 7865 7866 instruct vdiv4F(vecX dst, vecX src) %{ 7867 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7868 match(Set dst (DivVF dst src)); 7869 format %{ "divps $dst,$src\t! div packed4F" %} 7870 ins_encode %{ 7871 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7872 %} 7873 ins_pipe( pipe_slow ); 7874 %} 7875 7876 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7877 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7878 match(Set dst (DivVF src1 src2)); 7879 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7880 ins_encode %{ 7881 int vector_len = 0; 7882 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7883 %} 7884 ins_pipe( pipe_slow ); 7885 %} 7886 7887 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7888 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7889 match(Set dst (DivVF src (LoadVector mem))); 7890 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7891 ins_encode %{ 7892 int vector_len = 0; 7893 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7894 %} 7895 ins_pipe( pipe_slow ); 7896 %} 7897 7898 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7899 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7900 match(Set dst (DivVF src1 src2)); 7901 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7902 ins_encode %{ 7903 int vector_len = 1; 7904 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7905 %} 7906 ins_pipe( pipe_slow ); 7907 %} 7908 7909 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7910 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7911 match(Set dst (DivVF src (LoadVector mem))); 7912 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 7913 ins_encode %{ 7914 int vector_len = 1; 7915 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7921 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7922 match(Set dst (DivVF src1 src2)); 7923 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 7924 ins_encode %{ 7925 int vector_len = 2; 7926 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7927 %} 7928 ins_pipe( pipe_slow ); 7929 %} 7930 7931 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 7932 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 7933 match(Set dst (DivVF src (LoadVector mem))); 7934 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 7935 ins_encode %{ 7936 int vector_len = 2; 7937 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 // Doubles vector div 7943 instruct vdiv2D(vecX dst, vecX src) %{ 7944 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7945 match(Set dst (DivVD dst src)); 7946 format %{ "divpd $dst,$src\t! div packed2D" %} 7947 ins_encode %{ 7948 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 7949 %} 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 7954 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7955 match(Set dst (DivVD src1 src2)); 7956 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 7957 ins_encode %{ 7958 int vector_len = 0; 7959 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7960 %} 7961 ins_pipe( pipe_slow ); 7962 %} 7963 7964 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 7965 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7966 match(Set dst (DivVD src (LoadVector mem))); 7967 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 7968 ins_encode %{ 7969 int vector_len = 0; 7970 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7971 %} 7972 ins_pipe( pipe_slow ); 7973 %} 7974 7975 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 7976 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7977 match(Set dst (DivVD src1 src2)); 7978 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 7979 ins_encode %{ 7980 int vector_len = 1; 7981 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7982 %} 7983 ins_pipe( pipe_slow ); 7984 %} 7985 7986 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 7987 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7988 match(Set dst (DivVD src (LoadVector mem))); 7989 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 7990 ins_encode %{ 7991 int vector_len = 1; 7992 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7993 %} 7994 ins_pipe( pipe_slow ); 7995 %} 7996 7997 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7998 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7999 match(Set dst (DivVD src1 src2)); 8000 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8001 ins_encode %{ 8002 int vector_len = 2; 8003 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8009 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8010 match(Set dst (DivVD src (LoadVector mem))); 8011 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8012 ins_encode %{ 8013 int vector_len = 2; 8014 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8015 %} 8016 ins_pipe( pipe_slow ); 8017 %} 8018 8019 // ------------------------------ Shift --------------------------------------- 8020 8021 // Left and right shift count vectors are the same on x86 8022 // (only lowest bits of xmm reg are used for count). 8023 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8024 match(Set dst (LShiftCntV cnt)); 8025 match(Set dst (RShiftCntV cnt)); 8026 format %{ "movd $dst,$cnt\t! load shift count" %} 8027 ins_encode %{ 8028 __ movdl($dst$$XMMRegister, $cnt$$Register); 8029 %} 8030 ins_pipe( pipe_slow ); 8031 %} 8032 8033 // --------------------------------- Sqrt -------------------------------------- 8034 8035 // Floating point vector sqrt 8036 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8037 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8038 match(Set dst (SqrtVD src)); 8039 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8040 ins_encode %{ 8041 int vector_len = 0; 8042 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8043 %} 8044 ins_pipe( pipe_slow ); 8045 %} 8046 8047 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8048 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8049 match(Set dst (SqrtVD (LoadVector mem))); 8050 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8051 ins_encode %{ 8052 int vector_len = 0; 8053 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8054 %} 8055 ins_pipe( pipe_slow ); 8056 %} 8057 8058 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8059 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8060 match(Set dst (SqrtVD src)); 8061 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8062 ins_encode %{ 8063 int vector_len = 1; 8064 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8070 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8071 match(Set dst (SqrtVD (LoadVector mem))); 8072 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8073 ins_encode %{ 8074 int vector_len = 1; 8075 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8081 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8082 match(Set dst (SqrtVD src)); 8083 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8084 ins_encode %{ 8085 int vector_len = 2; 8086 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8092 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8093 match(Set dst (SqrtVD (LoadVector mem))); 8094 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8095 ins_encode %{ 8096 int vector_len = 2; 8097 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8098 %} 8099 ins_pipe( pipe_slow ); 8100 %} 8101 8102 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8103 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8104 match(Set dst (SqrtVF src)); 8105 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8106 ins_encode %{ 8107 int vector_len = 0; 8108 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8109 %} 8110 ins_pipe( pipe_slow ); 8111 %} 8112 8113 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8114 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8115 match(Set dst (SqrtVF (LoadVector mem))); 8116 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8117 ins_encode %{ 8118 int vector_len = 0; 8119 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8125 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8126 match(Set dst (SqrtVF src)); 8127 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8128 ins_encode %{ 8129 int vector_len = 0; 8130 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8131 %} 8132 ins_pipe( pipe_slow ); 8133 %} 8134 8135 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8136 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8137 match(Set dst (SqrtVF (LoadVector mem))); 8138 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8139 ins_encode %{ 8140 int vector_len = 0; 8141 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8142 %} 8143 ins_pipe( pipe_slow ); 8144 %} 8145 8146 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8147 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8148 match(Set dst (SqrtVF src)); 8149 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8150 ins_encode %{ 8151 int vector_len = 1; 8152 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8153 %} 8154 ins_pipe( pipe_slow ); 8155 %} 8156 8157 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8158 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8159 match(Set dst (SqrtVF (LoadVector mem))); 8160 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8161 ins_encode %{ 8162 int vector_len = 1; 8163 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8164 %} 8165 ins_pipe( pipe_slow ); 8166 %} 8167 8168 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8169 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8170 match(Set dst (SqrtVF src)); 8171 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8172 ins_encode %{ 8173 int vector_len = 2; 8174 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8175 %} 8176 ins_pipe( pipe_slow ); 8177 %} 8178 8179 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8180 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8181 match(Set dst (SqrtVF (LoadVector mem))); 8182 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8183 ins_encode %{ 8184 int vector_len = 2; 8185 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8186 %} 8187 ins_pipe( pipe_slow ); 8188 %} 8189 8190 // ------------------------------ LeftShift ----------------------------------- 8191 8192 // Shorts/Chars vector left shift 8193 instruct vsll2S(vecS dst, vecS shift) %{ 8194 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8195 match(Set dst (LShiftVS dst shift)); 8196 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8197 ins_encode %{ 8198 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8199 %} 8200 ins_pipe( pipe_slow ); 8201 %} 8202 8203 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8204 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8205 match(Set dst (LShiftVS dst shift)); 8206 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8207 ins_encode %{ 8208 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8209 %} 8210 ins_pipe( pipe_slow ); 8211 %} 8212 8213 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 8214 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8215 match(Set dst (LShiftVS src shift)); 8216 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8217 ins_encode %{ 8218 int vector_len = 0; 8219 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8220 %} 8221 ins_pipe( pipe_slow ); 8222 %} 8223 8224 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8225 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8226 match(Set dst (LShiftVS src shift)); 8227 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8228 ins_encode %{ 8229 int vector_len = 0; 8230 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8231 %} 8232 ins_pipe( pipe_slow ); 8233 %} 8234 8235 instruct vsll4S(vecD dst, vecS shift) %{ 8236 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8237 match(Set dst (LShiftVS dst shift)); 8238 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8239 ins_encode %{ 8240 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8241 %} 8242 ins_pipe( pipe_slow ); 8243 %} 8244 8245 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8246 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8247 match(Set dst (LShiftVS dst shift)); 8248 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8249 ins_encode %{ 8250 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8251 %} 8252 ins_pipe( pipe_slow ); 8253 %} 8254 8255 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 8256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8257 match(Set dst (LShiftVS src shift)); 8258 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8259 ins_encode %{ 8260 int vector_len = 0; 8261 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8262 %} 8263 ins_pipe( pipe_slow ); 8264 %} 8265 8266 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8267 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8268 match(Set dst (LShiftVS src shift)); 8269 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8270 ins_encode %{ 8271 int vector_len = 0; 8272 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8273 %} 8274 ins_pipe( pipe_slow ); 8275 %} 8276 8277 instruct vsll8S(vecX dst, vecS shift) %{ 8278 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8279 match(Set dst (LShiftVS dst shift)); 8280 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8281 ins_encode %{ 8282 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8287 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8288 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8289 match(Set dst (LShiftVS dst shift)); 8290 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8291 ins_encode %{ 8292 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8293 %} 8294 ins_pipe( pipe_slow ); 8295 %} 8296 8297 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 8298 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8299 match(Set dst (LShiftVS src shift)); 8300 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8301 ins_encode %{ 8302 int vector_len = 0; 8303 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8304 %} 8305 ins_pipe( pipe_slow ); 8306 %} 8307 8308 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8309 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8310 match(Set dst (LShiftVS src shift)); 8311 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8312 ins_encode %{ 8313 int vector_len = 0; 8314 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 8320 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8321 match(Set dst (LShiftVS src shift)); 8322 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8323 ins_encode %{ 8324 int vector_len = 1; 8325 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8326 %} 8327 ins_pipe( pipe_slow ); 8328 %} 8329 8330 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8331 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8332 match(Set dst (LShiftVS src shift)); 8333 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8334 ins_encode %{ 8335 int vector_len = 1; 8336 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8337 %} 8338 ins_pipe( pipe_slow ); 8339 %} 8340 8341 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8342 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8343 match(Set dst (LShiftVS src shift)); 8344 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8345 ins_encode %{ 8346 int vector_len = 2; 8347 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8348 %} 8349 ins_pipe( pipe_slow ); 8350 %} 8351 8352 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8353 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8354 match(Set dst (LShiftVS src shift)); 8355 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8356 ins_encode %{ 8357 int vector_len = 2; 8358 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8359 %} 8360 ins_pipe( pipe_slow ); 8361 %} 8362 8363 // Integers vector left shift 8364 instruct vsll2I(vecD dst, vecS shift) %{ 8365 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8366 match(Set dst (LShiftVI dst shift)); 8367 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8368 ins_encode %{ 8369 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8370 %} 8371 ins_pipe( pipe_slow ); 8372 %} 8373 8374 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8375 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8376 match(Set dst (LShiftVI dst shift)); 8377 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8378 ins_encode %{ 8379 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8380 %} 8381 ins_pipe( pipe_slow ); 8382 %} 8383 8384 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8385 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8386 match(Set dst (LShiftVI src shift)); 8387 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8388 ins_encode %{ 8389 int vector_len = 0; 8390 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8391 %} 8392 ins_pipe( pipe_slow ); 8393 %} 8394 8395 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8396 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8397 match(Set dst (LShiftVI src shift)); 8398 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8399 ins_encode %{ 8400 int vector_len = 0; 8401 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8402 %} 8403 ins_pipe( pipe_slow ); 8404 %} 8405 8406 instruct vsll4I(vecX dst, vecS shift) %{ 8407 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8408 match(Set dst (LShiftVI dst shift)); 8409 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8410 ins_encode %{ 8411 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8412 %} 8413 ins_pipe( pipe_slow ); 8414 %} 8415 8416 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8417 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8418 match(Set dst (LShiftVI dst shift)); 8419 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8420 ins_encode %{ 8421 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8422 %} 8423 ins_pipe( pipe_slow ); 8424 %} 8425 8426 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8428 match(Set dst (LShiftVI src shift)); 8429 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8430 ins_encode %{ 8431 int vector_len = 0; 8432 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8433 %} 8434 ins_pipe( pipe_slow ); 8435 %} 8436 8437 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8438 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8439 match(Set dst (LShiftVI src shift)); 8440 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8441 ins_encode %{ 8442 int vector_len = 0; 8443 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8444 %} 8445 ins_pipe( pipe_slow ); 8446 %} 8447 8448 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8449 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8450 match(Set dst (LShiftVI src shift)); 8451 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8452 ins_encode %{ 8453 int vector_len = 1; 8454 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8455 %} 8456 ins_pipe( pipe_slow ); 8457 %} 8458 8459 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8460 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8461 match(Set dst (LShiftVI src shift)); 8462 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8463 ins_encode %{ 8464 int vector_len = 1; 8465 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8466 %} 8467 ins_pipe( pipe_slow ); 8468 %} 8469 8470 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8471 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8472 match(Set dst (LShiftVI src shift)); 8473 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8474 ins_encode %{ 8475 int vector_len = 2; 8476 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8477 %} 8478 ins_pipe( pipe_slow ); 8479 %} 8480 8481 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8482 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8483 match(Set dst (LShiftVI src shift)); 8484 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8485 ins_encode %{ 8486 int vector_len = 2; 8487 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8488 %} 8489 ins_pipe( pipe_slow ); 8490 %} 8491 8492 // Longs vector left shift 8493 instruct vsll2L(vecX dst, vecS shift) %{ 8494 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8495 match(Set dst (LShiftVL dst shift)); 8496 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8497 ins_encode %{ 8498 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8499 %} 8500 ins_pipe( pipe_slow ); 8501 %} 8502 8503 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8504 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8505 match(Set dst (LShiftVL dst shift)); 8506 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8507 ins_encode %{ 8508 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8509 %} 8510 ins_pipe( pipe_slow ); 8511 %} 8512 8513 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8514 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8515 match(Set dst (LShiftVL src shift)); 8516 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8517 ins_encode %{ 8518 int vector_len = 0; 8519 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8520 %} 8521 ins_pipe( pipe_slow ); 8522 %} 8523 8524 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8525 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8526 match(Set dst (LShiftVL src shift)); 8527 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8528 ins_encode %{ 8529 int vector_len = 0; 8530 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8536 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8537 match(Set dst (LShiftVL src shift)); 8538 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8539 ins_encode %{ 8540 int vector_len = 1; 8541 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8542 %} 8543 ins_pipe( pipe_slow ); 8544 %} 8545 8546 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8547 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8548 match(Set dst (LShiftVL src shift)); 8549 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8550 ins_encode %{ 8551 int vector_len = 1; 8552 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8553 %} 8554 ins_pipe( pipe_slow ); 8555 %} 8556 8557 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8558 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8559 match(Set dst (LShiftVL src shift)); 8560 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8561 ins_encode %{ 8562 int vector_len = 2; 8563 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8569 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8570 match(Set dst (LShiftVL src shift)); 8571 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 8572 ins_encode %{ 8573 int vector_len = 2; 8574 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 // ----------------------- LogicalRightShift ----------------------------------- 8580 8581 // Shorts vector logical right shift produces incorrect Java result 8582 // for negative data because java code convert short value into int with 8583 // sign extension before a shift. But char vectors are fine since chars are 8584 // unsigned values. 8585 8586 instruct vsrl2S(vecS dst, vecS shift) %{ 8587 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8588 match(Set dst (URShiftVS dst shift)); 8589 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8590 ins_encode %{ 8591 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8592 %} 8593 ins_pipe( pipe_slow ); 8594 %} 8595 8596 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 8597 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8598 match(Set dst (URShiftVS dst shift)); 8599 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 8600 ins_encode %{ 8601 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8602 %} 8603 ins_pipe( pipe_slow ); 8604 %} 8605 8606 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 8607 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8608 match(Set dst (URShiftVS src shift)); 8609 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8610 ins_encode %{ 8611 int vector_len = 0; 8612 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8613 %} 8614 ins_pipe( pipe_slow ); 8615 %} 8616 8617 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 8618 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8619 match(Set dst (URShiftVS src shift)); 8620 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 8621 ins_encode %{ 8622 int vector_len = 0; 8623 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8624 %} 8625 ins_pipe( pipe_slow ); 8626 %} 8627 8628 instruct vsrl4S(vecD dst, vecS shift) %{ 8629 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8630 match(Set dst (URShiftVS dst shift)); 8631 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8632 ins_encode %{ 8633 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8634 %} 8635 ins_pipe( pipe_slow ); 8636 %} 8637 8638 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 8639 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8640 match(Set dst (URShiftVS dst shift)); 8641 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 8642 ins_encode %{ 8643 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8644 %} 8645 ins_pipe( pipe_slow ); 8646 %} 8647 8648 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 8649 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8650 match(Set dst (URShiftVS src shift)); 8651 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8652 ins_encode %{ 8653 int vector_len = 0; 8654 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8655 %} 8656 ins_pipe( pipe_slow ); 8657 %} 8658 8659 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8660 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8661 match(Set dst (URShiftVS src shift)); 8662 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 8663 ins_encode %{ 8664 int vector_len = 0; 8665 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8666 %} 8667 ins_pipe( pipe_slow ); 8668 %} 8669 8670 instruct vsrl8S(vecX dst, vecS shift) %{ 8671 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8672 match(Set dst (URShiftVS dst shift)); 8673 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8674 ins_encode %{ 8675 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 8676 %} 8677 ins_pipe( pipe_slow ); 8678 %} 8679 8680 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 8681 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8682 match(Set dst (URShiftVS dst shift)); 8683 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 8684 ins_encode %{ 8685 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 8691 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8692 match(Set dst (URShiftVS src shift)); 8693 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8694 ins_encode %{ 8695 int vector_len = 0; 8696 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8702 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8703 match(Set dst (URShiftVS src shift)); 8704 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 8705 ins_encode %{ 8706 int vector_len = 0; 8707 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 8713 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8714 match(Set dst (URShiftVS src shift)); 8715 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8716 ins_encode %{ 8717 int vector_len = 1; 8718 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8719 %} 8720 ins_pipe( pipe_slow ); 8721 %} 8722 8723 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8724 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8725 match(Set dst (URShiftVS src shift)); 8726 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 8727 ins_encode %{ 8728 int vector_len = 1; 8729 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8730 %} 8731 ins_pipe( pipe_slow ); 8732 %} 8733 8734 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8735 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8736 match(Set dst (URShiftVS src shift)); 8737 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8738 ins_encode %{ 8739 int vector_len = 2; 8740 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8741 %} 8742 ins_pipe( pipe_slow ); 8743 %} 8744 8745 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8746 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8747 match(Set dst (URShiftVS src shift)); 8748 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 8749 ins_encode %{ 8750 int vector_len = 2; 8751 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8752 %} 8753 ins_pipe( pipe_slow ); 8754 %} 8755 8756 // Integers vector logical right shift 8757 instruct vsrl2I(vecD dst, vecS shift) %{ 8758 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8759 match(Set dst (URShiftVI dst shift)); 8760 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8761 ins_encode %{ 8762 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 8768 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8769 match(Set dst (URShiftVI dst shift)); 8770 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 8771 ins_encode %{ 8772 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8773 %} 8774 ins_pipe( pipe_slow ); 8775 %} 8776 8777 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 8778 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8779 match(Set dst (URShiftVI src shift)); 8780 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8781 ins_encode %{ 8782 int vector_len = 0; 8783 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8784 %} 8785 ins_pipe( pipe_slow ); 8786 %} 8787 8788 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8789 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8790 match(Set dst (URShiftVI src shift)); 8791 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 8792 ins_encode %{ 8793 int vector_len = 0; 8794 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vsrl4I(vecX dst, vecS shift) %{ 8800 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8801 match(Set dst (URShiftVI dst shift)); 8802 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8803 ins_encode %{ 8804 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 8805 %} 8806 ins_pipe( pipe_slow ); 8807 %} 8808 8809 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 8810 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8811 match(Set dst (URShiftVI dst shift)); 8812 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 8813 ins_encode %{ 8814 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 8815 %} 8816 ins_pipe( pipe_slow ); 8817 %} 8818 8819 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 8820 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8821 match(Set dst (URShiftVI src shift)); 8822 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8823 ins_encode %{ 8824 int vector_len = 0; 8825 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8826 %} 8827 ins_pipe( pipe_slow ); 8828 %} 8829 8830 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8831 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8832 match(Set dst (URShiftVI src shift)); 8833 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 8834 ins_encode %{ 8835 int vector_len = 0; 8836 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8837 %} 8838 ins_pipe( pipe_slow ); 8839 %} 8840 8841 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 8842 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8843 match(Set dst (URShiftVI src shift)); 8844 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8845 ins_encode %{ 8846 int vector_len = 1; 8847 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8848 %} 8849 ins_pipe( pipe_slow ); 8850 %} 8851 8852 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8853 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8854 match(Set dst (URShiftVI src shift)); 8855 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 8856 ins_encode %{ 8857 int vector_len = 1; 8858 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8859 %} 8860 ins_pipe( pipe_slow ); 8861 %} 8862 8863 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8864 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8865 match(Set dst (URShiftVI src shift)); 8866 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8867 ins_encode %{ 8868 int vector_len = 2; 8869 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8870 %} 8871 ins_pipe( pipe_slow ); 8872 %} 8873 8874 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8875 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8876 match(Set dst (URShiftVI src shift)); 8877 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 8878 ins_encode %{ 8879 int vector_len = 2; 8880 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8881 %} 8882 ins_pipe( pipe_slow ); 8883 %} 8884 8885 // Longs vector logical right shift 8886 instruct vsrl2L(vecX dst, vecS shift) %{ 8887 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8888 match(Set dst (URShiftVL dst shift)); 8889 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8890 ins_encode %{ 8891 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8892 %} 8893 ins_pipe( pipe_slow ); 8894 %} 8895 8896 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 8897 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8898 match(Set dst (URShiftVL dst shift)); 8899 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 8900 ins_encode %{ 8901 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 8906 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 8907 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8908 match(Set dst (URShiftVL src shift)); 8909 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8910 ins_encode %{ 8911 int vector_len = 0; 8912 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8913 %} 8914 ins_pipe( pipe_slow ); 8915 %} 8916 8917 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8918 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8919 match(Set dst (URShiftVL src shift)); 8920 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 8921 ins_encode %{ 8922 int vector_len = 0; 8923 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8924 %} 8925 ins_pipe( pipe_slow ); 8926 %} 8927 8928 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 8929 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8930 match(Set dst (URShiftVL src shift)); 8931 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8932 ins_encode %{ 8933 int vector_len = 1; 8934 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8935 %} 8936 ins_pipe( pipe_slow ); 8937 %} 8938 8939 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8940 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8941 match(Set dst (URShiftVL src shift)); 8942 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 8943 ins_encode %{ 8944 int vector_len = 1; 8945 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8946 %} 8947 ins_pipe( pipe_slow ); 8948 %} 8949 8950 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 8951 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8952 match(Set dst (URShiftVL src shift)); 8953 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8954 ins_encode %{ 8955 int vector_len = 2; 8956 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8957 %} 8958 ins_pipe( pipe_slow ); 8959 %} 8960 8961 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8962 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8963 match(Set dst (URShiftVL src shift)); 8964 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 8965 ins_encode %{ 8966 int vector_len = 2; 8967 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 8972 // ------------------- ArithmeticRightShift ----------------------------------- 8973 8974 // Shorts/Chars vector arithmetic right shift 8975 instruct vsra2S(vecS dst, vecS shift) %{ 8976 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8977 match(Set dst (RShiftVS dst shift)); 8978 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8979 ins_encode %{ 8980 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 8981 %} 8982 ins_pipe( pipe_slow ); 8983 %} 8984 8985 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 8986 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8987 match(Set dst (RShiftVS dst shift)); 8988 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 8989 ins_encode %{ 8990 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 8991 %} 8992 ins_pipe( pipe_slow ); 8993 %} 8994 8995 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 8996 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8997 match(Set dst (RShiftVS src shift)); 8998 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 8999 ins_encode %{ 9000 int vector_len = 0; 9001 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9002 %} 9003 ins_pipe( pipe_slow ); 9004 %} 9005 9006 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 9007 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9008 match(Set dst (RShiftVS src shift)); 9009 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9010 ins_encode %{ 9011 int vector_len = 0; 9012 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9013 %} 9014 ins_pipe( pipe_slow ); 9015 %} 9016 9017 instruct vsra4S(vecD dst, vecS shift) %{ 9018 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9019 match(Set dst (RShiftVS dst shift)); 9020 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9021 ins_encode %{ 9022 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9023 %} 9024 ins_pipe( pipe_slow ); 9025 %} 9026 9027 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9028 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9029 match(Set dst (RShiftVS dst shift)); 9030 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9031 ins_encode %{ 9032 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9033 %} 9034 ins_pipe( pipe_slow ); 9035 %} 9036 9037 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 9038 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9039 match(Set dst (RShiftVS src shift)); 9040 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9041 ins_encode %{ 9042 int vector_len = 0; 9043 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9049 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9050 match(Set dst (RShiftVS src shift)); 9051 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9052 ins_encode %{ 9053 int vector_len = 0; 9054 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9055 %} 9056 ins_pipe( pipe_slow ); 9057 %} 9058 9059 instruct vsra8S(vecX dst, vecS shift) %{ 9060 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9061 match(Set dst (RShiftVS dst shift)); 9062 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9063 ins_encode %{ 9064 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9070 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9071 match(Set dst (RShiftVS dst shift)); 9072 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9073 ins_encode %{ 9074 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9075 %} 9076 ins_pipe( pipe_slow ); 9077 %} 9078 9079 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 9080 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9081 match(Set dst (RShiftVS src shift)); 9082 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9083 ins_encode %{ 9084 int vector_len = 0; 9085 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9086 %} 9087 ins_pipe( pipe_slow ); 9088 %} 9089 9090 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9091 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9092 match(Set dst (RShiftVS src shift)); 9093 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9094 ins_encode %{ 9095 int vector_len = 0; 9096 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9097 %} 9098 ins_pipe( pipe_slow ); 9099 %} 9100 9101 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 9102 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9103 match(Set dst (RShiftVS src shift)); 9104 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9105 ins_encode %{ 9106 int vector_len = 1; 9107 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9108 %} 9109 ins_pipe( pipe_slow ); 9110 %} 9111 9112 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9113 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9114 match(Set dst (RShiftVS src shift)); 9115 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9116 ins_encode %{ 9117 int vector_len = 1; 9118 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9119 %} 9120 ins_pipe( pipe_slow ); 9121 %} 9122 9123 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9124 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9125 match(Set dst (RShiftVS src shift)); 9126 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9127 ins_encode %{ 9128 int vector_len = 2; 9129 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9130 %} 9131 ins_pipe( pipe_slow ); 9132 %} 9133 9134 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9135 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9136 match(Set dst (RShiftVS src shift)); 9137 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9138 ins_encode %{ 9139 int vector_len = 2; 9140 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9141 %} 9142 ins_pipe( pipe_slow ); 9143 %} 9144 9145 // Integers vector arithmetic right shift 9146 instruct vsra2I(vecD dst, vecS shift) %{ 9147 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9148 match(Set dst (RShiftVI dst shift)); 9149 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9150 ins_encode %{ 9151 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9152 %} 9153 ins_pipe( pipe_slow ); 9154 %} 9155 9156 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9157 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9158 match(Set dst (RShiftVI dst shift)); 9159 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9160 ins_encode %{ 9161 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9162 %} 9163 ins_pipe( pipe_slow ); 9164 %} 9165 9166 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9167 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9168 match(Set dst (RShiftVI src shift)); 9169 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9170 ins_encode %{ 9171 int vector_len = 0; 9172 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9173 %} 9174 ins_pipe( pipe_slow ); 9175 %} 9176 9177 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9178 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9179 match(Set dst (RShiftVI src shift)); 9180 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9181 ins_encode %{ 9182 int vector_len = 0; 9183 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9184 %} 9185 ins_pipe( pipe_slow ); 9186 %} 9187 9188 instruct vsra4I(vecX dst, vecS shift) %{ 9189 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9190 match(Set dst (RShiftVI dst shift)); 9191 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9192 ins_encode %{ 9193 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 9199 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9200 match(Set dst (RShiftVI dst shift)); 9201 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 9202 ins_encode %{ 9203 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9204 %} 9205 ins_pipe( pipe_slow ); 9206 %} 9207 9208 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 9209 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9210 match(Set dst (RShiftVI src shift)); 9211 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9212 ins_encode %{ 9213 int vector_len = 0; 9214 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9215 %} 9216 ins_pipe( pipe_slow ); 9217 %} 9218 9219 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9220 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9221 match(Set dst (RShiftVI src shift)); 9222 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 9223 ins_encode %{ 9224 int vector_len = 0; 9225 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9226 %} 9227 ins_pipe( pipe_slow ); 9228 %} 9229 9230 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 9231 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9232 match(Set dst (RShiftVI src shift)); 9233 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9234 ins_encode %{ 9235 int vector_len = 1; 9236 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9237 %} 9238 ins_pipe( pipe_slow ); 9239 %} 9240 9241 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9242 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9243 match(Set dst (RShiftVI src shift)); 9244 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 9245 ins_encode %{ 9246 int vector_len = 1; 9247 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9248 %} 9249 ins_pipe( pipe_slow ); 9250 %} 9251 9252 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9253 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9254 match(Set dst (RShiftVI src shift)); 9255 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9256 ins_encode %{ 9257 int vector_len = 2; 9258 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9259 %} 9260 ins_pipe( pipe_slow ); 9261 %} 9262 9263 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9264 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9265 match(Set dst (RShiftVI src shift)); 9266 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 9267 ins_encode %{ 9268 int vector_len = 2; 9269 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9270 %} 9271 ins_pipe( pipe_slow ); 9272 %} 9273 9274 // There are no longs vector arithmetic right shift instructions. 9275 9276 9277 // --------------------------------- AND -------------------------------------- 9278 9279 instruct vand4B(vecS dst, vecS src) %{ 9280 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9281 match(Set dst (AndV dst src)); 9282 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 9283 ins_encode %{ 9284 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 9290 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9291 match(Set dst (AndV src1 src2)); 9292 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 9293 ins_encode %{ 9294 int vector_len = 0; 9295 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9296 %} 9297 ins_pipe( pipe_slow ); 9298 %} 9299 9300 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 9301 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9302 match(Set dst (AndV src (LoadVector mem))); 9303 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 9304 ins_encode %{ 9305 int vector_len = 0; 9306 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct vand8B(vecD dst, vecD src) %{ 9312 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9313 match(Set dst (AndV dst src)); 9314 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 9315 ins_encode %{ 9316 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9317 %} 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 9322 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9323 match(Set dst (AndV src1 src2)); 9324 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 9325 ins_encode %{ 9326 int vector_len = 0; 9327 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9328 %} 9329 ins_pipe( pipe_slow ); 9330 %} 9331 9332 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 9333 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9334 match(Set dst (AndV src (LoadVector mem))); 9335 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 9336 ins_encode %{ 9337 int vector_len = 0; 9338 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9339 %} 9340 ins_pipe( pipe_slow ); 9341 %} 9342 9343 instruct vand16B(vecX dst, vecX src) %{ 9344 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9345 match(Set dst (AndV dst src)); 9346 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 9347 ins_encode %{ 9348 __ pand($dst$$XMMRegister, $src$$XMMRegister); 9349 %} 9350 ins_pipe( pipe_slow ); 9351 %} 9352 9353 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 9354 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9355 match(Set dst (AndV src1 src2)); 9356 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 9357 ins_encode %{ 9358 int vector_len = 0; 9359 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9360 %} 9361 ins_pipe( pipe_slow ); 9362 %} 9363 9364 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 9365 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9366 match(Set dst (AndV src (LoadVector mem))); 9367 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 9368 ins_encode %{ 9369 int vector_len = 0; 9370 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9371 %} 9372 ins_pipe( pipe_slow ); 9373 %} 9374 9375 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 9376 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9377 match(Set dst (AndV src1 src2)); 9378 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 9379 ins_encode %{ 9380 int vector_len = 1; 9381 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9382 %} 9383 ins_pipe( pipe_slow ); 9384 %} 9385 9386 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 9387 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9388 match(Set dst (AndV src (LoadVector mem))); 9389 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 9390 ins_encode %{ 9391 int vector_len = 1; 9392 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9393 %} 9394 ins_pipe( pipe_slow ); 9395 %} 9396 9397 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9398 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9399 match(Set dst (AndV src1 src2)); 9400 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 9401 ins_encode %{ 9402 int vector_len = 2; 9403 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9404 %} 9405 ins_pipe( pipe_slow ); 9406 %} 9407 9408 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 9409 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9410 match(Set dst (AndV src (LoadVector mem))); 9411 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 9412 ins_encode %{ 9413 int vector_len = 2; 9414 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 // --------------------------------- OR --------------------------------------- 9420 9421 instruct vor4B(vecS dst, vecS src) %{ 9422 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9423 match(Set dst (OrV dst src)); 9424 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 9425 ins_encode %{ 9426 __ por($dst$$XMMRegister, $src$$XMMRegister); 9427 %} 9428 ins_pipe( pipe_slow ); 9429 %} 9430 9431 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9432 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9433 match(Set dst (OrV src1 src2)); 9434 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 9435 ins_encode %{ 9436 int vector_len = 0; 9437 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9438 %} 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 9443 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9444 match(Set dst (OrV src (LoadVector mem))); 9445 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 9446 ins_encode %{ 9447 int vector_len = 0; 9448 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vor8B(vecD dst, vecD src) %{ 9454 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9455 match(Set dst (OrV dst src)); 9456 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 9457 ins_encode %{ 9458 __ por($dst$$XMMRegister, $src$$XMMRegister); 9459 %} 9460 ins_pipe( pipe_slow ); 9461 %} 9462 9463 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9464 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9465 match(Set dst (OrV src1 src2)); 9466 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 9467 ins_encode %{ 9468 int vector_len = 0; 9469 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9470 %} 9471 ins_pipe( pipe_slow ); 9472 %} 9473 9474 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 9475 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9476 match(Set dst (OrV src (LoadVector mem))); 9477 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 9478 ins_encode %{ 9479 int vector_len = 0; 9480 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9481 %} 9482 ins_pipe( pipe_slow ); 9483 %} 9484 9485 instruct vor16B(vecX dst, vecX src) %{ 9486 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9487 match(Set dst (OrV dst src)); 9488 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 9489 ins_encode %{ 9490 __ por($dst$$XMMRegister, $src$$XMMRegister); 9491 %} 9492 ins_pipe( pipe_slow ); 9493 %} 9494 9495 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9496 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9497 match(Set dst (OrV src1 src2)); 9498 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 9499 ins_encode %{ 9500 int vector_len = 0; 9501 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9502 %} 9503 ins_pipe( pipe_slow ); 9504 %} 9505 9506 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 9507 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9508 match(Set dst (OrV src (LoadVector mem))); 9509 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 9510 ins_encode %{ 9511 int vector_len = 0; 9512 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9513 %} 9514 ins_pipe( pipe_slow ); 9515 %} 9516 9517 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9518 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9519 match(Set dst (OrV src1 src2)); 9520 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 9521 ins_encode %{ 9522 int vector_len = 1; 9523 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9524 %} 9525 ins_pipe( pipe_slow ); 9526 %} 9527 9528 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9529 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9530 match(Set dst (OrV src (LoadVector mem))); 9531 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9532 ins_encode %{ 9533 int vector_len = 1; 9534 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9535 %} 9536 ins_pipe( pipe_slow ); 9537 %} 9538 9539 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9540 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9541 match(Set dst (OrV src1 src2)); 9542 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9543 ins_encode %{ 9544 int vector_len = 2; 9545 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9546 %} 9547 ins_pipe( pipe_slow ); 9548 %} 9549 9550 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9551 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9552 match(Set dst (OrV src (LoadVector mem))); 9553 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9554 ins_encode %{ 9555 int vector_len = 2; 9556 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9557 %} 9558 ins_pipe( pipe_slow ); 9559 %} 9560 9561 // --------------------------------- XOR -------------------------------------- 9562 9563 instruct vxor4B(vecS dst, vecS src) %{ 9564 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9565 match(Set dst (XorV dst src)); 9566 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9567 ins_encode %{ 9568 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9569 %} 9570 ins_pipe( pipe_slow ); 9571 %} 9572 9573 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9574 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9575 match(Set dst (XorV src1 src2)); 9576 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9577 ins_encode %{ 9578 int vector_len = 0; 9579 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9580 %} 9581 ins_pipe( pipe_slow ); 9582 %} 9583 9584 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9585 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9586 match(Set dst (XorV src (LoadVector mem))); 9587 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9588 ins_encode %{ 9589 int vector_len = 0; 9590 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9591 %} 9592 ins_pipe( pipe_slow ); 9593 %} 9594 9595 instruct vxor8B(vecD dst, vecD src) %{ 9596 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9597 match(Set dst (XorV dst src)); 9598 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9599 ins_encode %{ 9600 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9606 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9607 match(Set dst (XorV src1 src2)); 9608 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9609 ins_encode %{ 9610 int vector_len = 0; 9611 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9612 %} 9613 ins_pipe( pipe_slow ); 9614 %} 9615 9616 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9617 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9618 match(Set dst (XorV src (LoadVector mem))); 9619 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9620 ins_encode %{ 9621 int vector_len = 0; 9622 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9623 %} 9624 ins_pipe( pipe_slow ); 9625 %} 9626 9627 instruct vxor16B(vecX dst, vecX src) %{ 9628 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9629 match(Set dst (XorV dst src)); 9630 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9631 ins_encode %{ 9632 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9633 %} 9634 ins_pipe( pipe_slow ); 9635 %} 9636 9637 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9638 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9639 match(Set dst (XorV src1 src2)); 9640 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9641 ins_encode %{ 9642 int vector_len = 0; 9643 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9644 %} 9645 ins_pipe( pipe_slow ); 9646 %} 9647 9648 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9649 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9650 match(Set dst (XorV src (LoadVector mem))); 9651 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9652 ins_encode %{ 9653 int vector_len = 0; 9654 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9660 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9661 match(Set dst (XorV src1 src2)); 9662 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9663 ins_encode %{ 9664 int vector_len = 1; 9665 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9666 %} 9667 ins_pipe( pipe_slow ); 9668 %} 9669 9670 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9671 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9672 match(Set dst (XorV src (LoadVector mem))); 9673 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9674 ins_encode %{ 9675 int vector_len = 1; 9676 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9677 %} 9678 ins_pipe( pipe_slow ); 9679 %} 9680 9681 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9682 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9683 match(Set dst (XorV src1 src2)); 9684 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9685 ins_encode %{ 9686 int vector_len = 2; 9687 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9693 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9694 match(Set dst (XorV src (LoadVector mem))); 9695 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9696 ins_encode %{ 9697 int vector_len = 2; 9698 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9699 %} 9700 ins_pipe( pipe_slow ); 9701 %} 9702 9703 // --------------------------------- FMA -------------------------------------- 9704 9705 // a * b + c 9706 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9707 predicate(UseFMA && n->as_Vector()->length() == 2); 9708 match(Set c (FmaVD c (Binary a b))); 9709 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9710 ins_cost(150); 9711 ins_encode %{ 9712 int vector_len = 0; 9713 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 // a * b + c 9719 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9720 predicate(UseFMA && n->as_Vector()->length() == 2); 9721 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9722 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9723 ins_cost(150); 9724 ins_encode %{ 9725 int vector_len = 0; 9726 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9727 %} 9728 ins_pipe( pipe_slow ); 9729 %} 9730 9731 9732 // a * b + c 9733 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9734 predicate(UseFMA && n->as_Vector()->length() == 4); 9735 match(Set c (FmaVD c (Binary a b))); 9736 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9737 ins_cost(150); 9738 ins_encode %{ 9739 int vector_len = 1; 9740 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9741 %} 9742 ins_pipe( pipe_slow ); 9743 %} 9744 9745 // a * b + c 9746 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9747 predicate(UseFMA && n->as_Vector()->length() == 4); 9748 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9749 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9750 ins_cost(150); 9751 ins_encode %{ 9752 int vector_len = 1; 9753 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9754 %} 9755 ins_pipe( pipe_slow ); 9756 %} 9757 9758 // a * b + c 9759 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9760 predicate(UseFMA && n->as_Vector()->length() == 8); 9761 match(Set c (FmaVD c (Binary a b))); 9762 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9763 ins_cost(150); 9764 ins_encode %{ 9765 int vector_len = 2; 9766 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9767 %} 9768 ins_pipe( pipe_slow ); 9769 %} 9770 9771 // a * b + c 9772 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9773 predicate(UseFMA && n->as_Vector()->length() == 8); 9774 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9775 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9776 ins_cost(150); 9777 ins_encode %{ 9778 int vector_len = 2; 9779 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9780 %} 9781 ins_pipe( pipe_slow ); 9782 %} 9783 9784 // a * b + c 9785 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9786 predicate(UseFMA && n->as_Vector()->length() == 4); 9787 match(Set c (FmaVF c (Binary a b))); 9788 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9789 ins_cost(150); 9790 ins_encode %{ 9791 int vector_len = 0; 9792 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 // a * b + c 9798 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9799 predicate(UseFMA && n->as_Vector()->length() == 4); 9800 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9801 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9802 ins_cost(150); 9803 ins_encode %{ 9804 int vector_len = 0; 9805 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9806 %} 9807 ins_pipe( pipe_slow ); 9808 %} 9809 9810 // a * b + c 9811 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9812 predicate(UseFMA && n->as_Vector()->length() == 8); 9813 match(Set c (FmaVF c (Binary a b))); 9814 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9815 ins_cost(150); 9816 ins_encode %{ 9817 int vector_len = 1; 9818 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9819 %} 9820 ins_pipe( pipe_slow ); 9821 %} 9822 9823 // a * b + c 9824 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9825 predicate(UseFMA && n->as_Vector()->length() == 8); 9826 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9827 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9828 ins_cost(150); 9829 ins_encode %{ 9830 int vector_len = 1; 9831 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9832 %} 9833 ins_pipe( pipe_slow ); 9834 %} 9835 9836 // a * b + c 9837 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9838 predicate(UseFMA && n->as_Vector()->length() == 16); 9839 match(Set c (FmaVF c (Binary a b))); 9840 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9841 ins_cost(150); 9842 ins_encode %{ 9843 int vector_len = 2; 9844 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 // a * b + c 9850 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9851 predicate(UseFMA && n->as_Vector()->length() == 16); 9852 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9853 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9854 ins_cost(150); 9855 ins_encode %{ 9856 int vector_len = 2; 9857 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9858 %} 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 // --------------------------------- Vector Multiply Add -------------------------------------- 9863 9864 instruct smuladd4S2I_reg(vecD dst, vecD src1) %{ 9865 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 2); 9866 match(Set dst (MulAddVS2VI dst src1)); 9867 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed4Sto2I" %} 9868 ins_encode %{ 9869 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vmuladd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9875 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9876 match(Set dst (MulAddVS2VI src1 src2)); 9877 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed4Sto2I" %} 9878 ins_encode %{ 9879 int vector_len = 0; 9880 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct smuladd8S4I_reg(vecX dst, vecX src1) %{ 9886 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4); 9887 match(Set dst (MulAddVS2VI dst src1)); 9888 format %{ "pmaddwd $dst,$dst,$src1\t! muladd packed8Sto4I" %} 9889 ins_encode %{ 9890 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9891 %} 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct vmuladd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9896 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9897 match(Set dst (MulAddVS2VI src1 src2)); 9898 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed8Sto4I" %} 9899 ins_encode %{ 9900 int vector_len = 0; 9901 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9902 %} 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 instruct vmuladd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9907 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9908 match(Set dst (MulAddVS2VI src1 src2)); 9909 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed16Sto8I" %} 9910 ins_encode %{ 9911 int vector_len = 1; 9912 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9913 %} 9914 ins_pipe( pipe_slow ); 9915 %} 9916 9917 instruct vmuladd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9918 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9919 match(Set dst (MulAddVS2VI src1 src2)); 9920 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packed32Sto16I" %} 9921 ins_encode %{ 9922 int vector_len = 2; 9923 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9924 %} 9925 ins_pipe( pipe_slow ); 9926 %} 9927 9928 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9929 9930 instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ 9931 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 2); 9932 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9933 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed4Sto2I" %} 9934 ins_encode %{ 9935 int vector_len = 0; 9936 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9937 %} 9938 ins_pipe( pipe_slow ); 9939 %} 9940 9941 instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ 9942 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 4); 9943 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9944 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed8Sto4I" %} 9945 ins_encode %{ 9946 int vector_len = 0; 9947 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9948 %} 9949 ins_pipe( pipe_slow ); 9950 %} 9951 9952 instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ 9953 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 8); 9954 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9955 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed16Sto8I" %} 9956 ins_encode %{ 9957 int vector_len = 1; 9958 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9959 %} 9960 ins_pipe( pipe_slow ); 9961 %} 9962 9963 instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9964 predicate(VM_Version::supports_vnni() && UseAVX > 2 && n->as_Vector()->length() == 16); 9965 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9966 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packed32Sto16I" %} 9967 ins_encode %{ 9968 int vector_len = 2; 9969 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9970 %} 9971 ins_pipe( pipe_slow ); 9972 %} 9973 9974 // --------------------------------- PopCount -------------------------------------- 9975 9976 instruct vpopcount2I(vecD dst, vecD src) %{ 9977 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9978 match(Set dst (PopCountVI src)); 9979 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9980 ins_encode %{ 9981 int vector_len = 0; 9982 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vpopcount4I(vecX dst, vecX src) %{ 9988 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9989 match(Set dst (PopCountVI src)); 9990 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9991 ins_encode %{ 9992 int vector_len = 0; 9993 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct vpopcount8I(vecY dst, vecY src) %{ 9999 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10000 match(Set dst (PopCountVI src)); 10001 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10002 ins_encode %{ 10003 int vector_len = 1; 10004 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10005 %} 10006 ins_pipe( pipe_slow ); 10007 %} 10008 10009 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10010 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10011 match(Set dst (PopCountVI src)); 10012 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10013 ins_encode %{ 10014 int vector_len = 2; 10015 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10016 %} 10017 ins_pipe( pipe_slow ); 10018 %}