1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 733 734 // Class for pre evex double registers 735 reg_class double_reg_legacy(XMM0, XMM0b, 736 XMM1, XMM1b, 737 XMM2, XMM2b, 738 XMM3, XMM3b, 739 XMM4, XMM4b, 740 XMM5, XMM5b, 741 XMM6, XMM6b, 742 XMM7, XMM7b 743 #ifdef _LP64 744 ,XMM8, XMM8b, 745 XMM9, XMM9b, 746 XMM10, XMM10b, 747 XMM11, XMM11b, 748 XMM12, XMM12b, 749 XMM13, XMM13b, 750 XMM14, XMM14b, 751 XMM15, XMM15b 752 #endif 753 ); 754 755 // Class for evex double registers 756 reg_class double_reg_evex(XMM0, XMM0b, 757 XMM1, XMM1b, 758 XMM2, XMM2b, 759 XMM3, XMM3b, 760 XMM4, XMM4b, 761 XMM5, XMM5b, 762 XMM6, XMM6b, 763 XMM7, XMM7b 764 #ifdef _LP64 765 ,XMM8, XMM8b, 766 XMM9, XMM9b, 767 XMM10, XMM10b, 768 XMM11, XMM11b, 769 XMM12, XMM12b, 770 XMM13, XMM13b, 771 XMM14, XMM14b, 772 XMM15, XMM15b, 773 XMM16, XMM16b, 774 XMM17, XMM17b, 775 XMM18, XMM18b, 776 XMM19, XMM19b, 777 XMM20, XMM20b, 778 XMM21, XMM21b, 779 XMM22, XMM22b, 780 XMM23, XMM23b, 781 XMM24, XMM24b, 782 XMM25, XMM25b, 783 XMM26, XMM26b, 784 XMM27, XMM27b, 785 XMM28, XMM28b, 786 XMM29, XMM29b, 787 XMM30, XMM30b, 788 XMM31, XMM31b 789 #endif 790 ); 791 792 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 793 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 794 795 // Class for pre evex 32bit vector registers 796 reg_class vectors_reg_legacy(XMM0, 797 XMM1, 798 XMM2, 799 XMM3, 800 XMM4, 801 XMM5, 802 XMM6, 803 XMM7 804 #ifdef _LP64 805 ,XMM8, 806 XMM9, 807 XMM10, 808 XMM11, 809 XMM12, 810 XMM13, 811 XMM14, 812 XMM15 813 #endif 814 ); 815 816 // Class for evex 32bit vector registers 817 reg_class vectors_reg_evex(XMM0, 818 XMM1, 819 XMM2, 820 XMM3, 821 XMM4, 822 XMM5, 823 XMM6, 824 XMM7 825 #ifdef _LP64 826 ,XMM8, 827 XMM9, 828 XMM10, 829 XMM11, 830 XMM12, 831 XMM13, 832 XMM14, 833 XMM15, 834 XMM16, 835 XMM17, 836 XMM18, 837 XMM19, 838 XMM20, 839 XMM21, 840 XMM22, 841 XMM23, 842 XMM24, 843 XMM25, 844 XMM26, 845 XMM27, 846 XMM28, 847 XMM29, 848 XMM30, 849 XMM31 850 #endif 851 ); 852 853 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 854 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 855 856 // Class for all 64bit vector registers 857 reg_class vectord_reg_legacy(XMM0, XMM0b, 858 XMM1, XMM1b, 859 XMM2, XMM2b, 860 XMM3, XMM3b, 861 XMM4, XMM4b, 862 XMM5, XMM5b, 863 XMM6, XMM6b, 864 XMM7, XMM7b 865 #ifdef _LP64 866 ,XMM8, XMM8b, 867 XMM9, XMM9b, 868 XMM10, XMM10b, 869 XMM11, XMM11b, 870 XMM12, XMM12b, 871 XMM13, XMM13b, 872 XMM14, XMM14b, 873 XMM15, XMM15b 874 #endif 875 ); 876 877 // Class for all 64bit vector registers 878 reg_class vectord_reg_evex(XMM0, XMM0b, 879 XMM1, XMM1b, 880 XMM2, XMM2b, 881 XMM3, XMM3b, 882 XMM4, XMM4b, 883 XMM5, XMM5b, 884 XMM6, XMM6b, 885 XMM7, XMM7b 886 #ifdef _LP64 887 ,XMM8, XMM8b, 888 XMM9, XMM9b, 889 XMM10, XMM10b, 890 XMM11, XMM11b, 891 XMM12, XMM12b, 892 XMM13, XMM13b, 893 XMM14, XMM14b, 894 XMM15, XMM15b, 895 XMM16, XMM16b, 896 XMM17, XMM17b, 897 XMM18, XMM18b, 898 XMM19, XMM19b, 899 XMM20, XMM20b, 900 XMM21, XMM21b, 901 XMM22, XMM22b, 902 XMM23, XMM23b, 903 XMM24, XMM24b, 904 XMM25, XMM25b, 905 XMM26, XMM26b, 906 XMM27, XMM27b, 907 XMM28, XMM28b, 908 XMM29, XMM29b, 909 XMM30, XMM30b, 910 XMM31, XMM31b 911 #endif 912 ); 913 914 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 915 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 916 917 // Class for all 128bit vector registers 918 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 919 XMM1, XMM1b, XMM1c, XMM1d, 920 XMM2, XMM2b, XMM2c, XMM2d, 921 XMM3, XMM3b, XMM3c, XMM3d, 922 XMM4, XMM4b, XMM4c, XMM4d, 923 XMM5, XMM5b, XMM5c, XMM5d, 924 XMM6, XMM6b, XMM6c, XMM6d, 925 XMM7, XMM7b, XMM7c, XMM7d 926 #ifdef _LP64 927 ,XMM8, XMM8b, XMM8c, XMM8d, 928 XMM9, XMM9b, XMM9c, XMM9d, 929 XMM10, XMM10b, XMM10c, XMM10d, 930 XMM11, XMM11b, XMM11c, XMM11d, 931 XMM12, XMM12b, XMM12c, XMM12d, 932 XMM13, XMM13b, XMM13c, XMM13d, 933 XMM14, XMM14b, XMM14c, XMM14d, 934 XMM15, XMM15b, XMM15c, XMM15d 935 #endif 936 ); 937 938 // Class for all 128bit vector registers 939 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 940 XMM1, XMM1b, XMM1c, XMM1d, 941 XMM2, XMM2b, XMM2c, XMM2d, 942 XMM3, XMM3b, XMM3c, XMM3d, 943 XMM4, XMM4b, XMM4c, XMM4d, 944 XMM5, XMM5b, XMM5c, XMM5d, 945 XMM6, XMM6b, XMM6c, XMM6d, 946 XMM7, XMM7b, XMM7c, XMM7d 947 #ifdef _LP64 948 ,XMM8, XMM8b, XMM8c, XMM8d, 949 XMM9, XMM9b, XMM9c, XMM9d, 950 XMM10, XMM10b, XMM10c, XMM10d, 951 XMM11, XMM11b, XMM11c, XMM11d, 952 XMM12, XMM12b, XMM12c, XMM12d, 953 XMM13, XMM13b, XMM13c, XMM13d, 954 XMM14, XMM14b, XMM14c, XMM14d, 955 XMM15, XMM15b, XMM15c, XMM15d, 956 XMM16, XMM16b, XMM16c, XMM16d, 957 XMM17, XMM17b, XMM17c, XMM17d, 958 XMM18, XMM18b, XMM18c, XMM18d, 959 XMM19, XMM19b, XMM19c, XMM19d, 960 XMM20, XMM20b, XMM20c, XMM20d, 961 XMM21, XMM21b, XMM21c, XMM21d, 962 XMM22, XMM22b, XMM22c, XMM22d, 963 XMM23, XMM23b, XMM23c, XMM23d, 964 XMM24, XMM24b, XMM24c, XMM24d, 965 XMM25, XMM25b, XMM25c, XMM25d, 966 XMM26, XMM26b, XMM26c, XMM26d, 967 XMM27, XMM27b, XMM27c, XMM27d, 968 XMM28, XMM28b, XMM28c, XMM28d, 969 XMM29, XMM29b, XMM29c, XMM29d, 970 XMM30, XMM30b, XMM30c, XMM30d, 971 XMM31, XMM31b, XMM31c, XMM31d 972 #endif 973 ); 974 975 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 976 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 977 978 // Class for all 256bit vector registers 979 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 980 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 981 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 982 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 983 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 984 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 985 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 986 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 987 #ifdef _LP64 988 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 989 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 990 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 991 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 992 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 993 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 994 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 995 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 996 #endif 997 ); 998 999 // Class for all 256bit vector registers 1000 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1001 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1002 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1003 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1004 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1005 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1006 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1007 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1008 #ifdef _LP64 1009 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1010 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1011 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1012 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1013 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1014 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1015 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1016 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1017 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1018 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1019 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1020 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1021 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1022 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1023 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1024 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1025 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1026 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1027 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1028 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1029 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1030 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1031 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1032 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1033 #endif 1034 ); 1035 1036 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1037 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1038 1039 // Class for all 512bit vector registers 1040 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1041 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1042 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1043 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1044 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1045 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1046 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1047 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1048 #ifdef _LP64 1049 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1057 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1073 #endif 1074 ); 1075 1076 // Class for restricted 512bit vector registers 1077 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1078 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1079 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1080 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1081 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1082 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1083 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1084 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1085 #ifdef _LP64 1086 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1087 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1088 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1089 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1090 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1091 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1092 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1093 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1094 #endif 1095 ); 1096 1097 reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1098 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1099 1100 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1101 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1102 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1103 1104 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1105 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1106 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1107 1108 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1109 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1110 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1111 1112 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1113 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1114 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1115 1116 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1117 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1118 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1119 1120 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1121 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1122 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1123 1124 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1125 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1126 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1127 1128 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1129 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1130 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1131 1132 #ifdef _LP64 1133 1134 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1135 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1136 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1137 1138 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1139 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1140 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1141 1142 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1143 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1144 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1145 1146 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1147 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1148 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1149 1150 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1151 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1152 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1153 1154 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1155 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1156 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1157 1158 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1159 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1160 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1161 1162 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1163 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1164 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1165 1166 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1167 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1168 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1169 1170 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1171 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1172 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1173 1174 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1175 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1176 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1177 1178 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1179 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1180 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1181 1182 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1183 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1184 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1185 1186 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1187 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1188 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1189 1190 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1191 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1192 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1193 1194 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1195 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1196 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1197 1198 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1199 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1200 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1201 1202 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1203 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1204 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1205 1206 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1207 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1208 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1209 1210 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1211 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1212 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1213 1214 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1215 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1216 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1217 1218 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1219 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1220 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1221 1222 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1223 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1224 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1225 1226 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1227 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1228 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1229 1230 #endif 1231 1232 %} 1233 1234 1235 //----------SOURCE BLOCK------------------------------------------------------- 1236 // This is a block of C++ code which provides values, functions, and 1237 // definitions necessary in the rest of the architecture description 1238 1239 source_hpp %{ 1240 // Header information of the source block. 1241 // Method declarations/definitions which are used outside 1242 // the ad-scope can conveniently be defined here. 1243 // 1244 // To keep related declarations/definitions/uses close together, 1245 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1246 1247 class NativeJump; 1248 1249 class CallStubImpl { 1250 1251 //-------------------------------------------------------------- 1252 //---< Used for optimization in Compile::shorten_branches >--- 1253 //-------------------------------------------------------------- 1254 1255 public: 1256 // Size of call trampoline stub. 1257 static uint size_call_trampoline() { 1258 return 0; // no call trampolines on this platform 1259 } 1260 1261 // number of relocations needed by a call trampoline stub 1262 static uint reloc_call_trampoline() { 1263 return 0; // no call trampolines on this platform 1264 } 1265 }; 1266 1267 class HandlerImpl { 1268 1269 public: 1270 1271 static int emit_exception_handler(CodeBuffer &cbuf); 1272 static int emit_deopt_handler(CodeBuffer& cbuf); 1273 1274 static uint size_exception_handler() { 1275 // NativeCall instruction size is the same as NativeJump. 1276 // exception handler starts out as jump and can be patched to 1277 // a call be deoptimization. (4932387) 1278 // Note that this value is also credited (in output.cpp) to 1279 // the size of the code section. 1280 return NativeJump::instruction_size; 1281 } 1282 1283 #ifdef _LP64 1284 static uint size_deopt_handler() { 1285 // three 5 byte instructions 1286 return 15; 1287 } 1288 #else 1289 static uint size_deopt_handler() { 1290 // NativeCall instruction size is the same as NativeJump. 1291 // exception handler starts out as jump and can be patched to 1292 // a call be deoptimization. (4932387) 1293 // Note that this value is also credited (in output.cpp) to 1294 // the size of the code section. 1295 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1296 } 1297 #endif 1298 }; 1299 1300 %} // end source_hpp 1301 1302 source %{ 1303 1304 #include "opto/addnode.hpp" 1305 1306 // Emit exception handler code. 1307 // Stuff framesize into a register and call a VM stub routine. 1308 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1309 1310 // Note that the code buffer's insts_mark is always relative to insts. 1311 // That's why we must use the macroassembler to generate a handler. 1312 MacroAssembler _masm(&cbuf); 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == NULL) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 MacroAssembler _masm(&cbuf); 1331 address base = __ start_a_stub(size_deopt_handler()); 1332 if (base == NULL) { 1333 ciEnv::current()->record_failure("CodeCache is full"); 1334 return 0; // CodeBuffer::expand failed 1335 } 1336 int offset = __ offset(); 1337 1338 #ifdef _LP64 1339 address the_pc = (address) __ pc(); 1340 Label next; 1341 // push a "the_pc" on the stack without destroying any registers 1342 // as they all may be live. 1343 1344 // push address of "next" 1345 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1346 __ bind(next); 1347 // adjust it so it matches "the_pc" 1348 __ subptr(Address(rsp, 0), __ offset() - offset); 1349 #else 1350 InternalAddress here(__ pc()); 1351 __ pushptr(here.addr()); 1352 #endif 1353 1354 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1355 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1356 __ end_a_stub(); 1357 return offset; 1358 } 1359 1360 1361 //============================================================================= 1362 1363 // Float masks come from different places depending on platform. 1364 #ifdef _LP64 1365 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1366 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1367 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1368 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1369 #else 1370 static address float_signmask() { return (address)float_signmask_pool; } 1371 static address float_signflip() { return (address)float_signflip_pool; } 1372 static address double_signmask() { return (address)double_signmask_pool; } 1373 static address double_signflip() { return (address)double_signflip_pool; } 1374 #endif 1375 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1376 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1377 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1378 1379 //============================================================================= 1380 const bool Matcher::match_rule_supported(int opcode) { 1381 if (!has_match_rule(opcode)) 1382 return false; 1383 1384 bool ret_value = true; 1385 switch (opcode) { 1386 case Op_AbsVL: 1387 if (UseAVX < 3) 1388 ret_value = false; 1389 case Op_PopCountI: 1390 case Op_PopCountL: 1391 if (!UsePopCountInstruction) 1392 ret_value = false; 1393 break; 1394 case Op_PopCountVI: 1395 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1396 ret_value = false; 1397 break; 1398 case Op_MulVI: 1399 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1400 ret_value = false; 1401 break; 1402 case Op_MulVL: 1403 case Op_MulReductionVL: 1404 if (VM_Version::supports_avx512dq() == false) 1405 ret_value = false; 1406 break; 1407 case Op_AddReductionVL: 1408 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1409 ret_value = false; 1410 break; 1411 case Op_AbsVB: 1412 case Op_AbsVS: 1413 case Op_AbsVI: 1414 case Op_AddReductionVI: 1415 if (UseSSE < 3 || !VM_Version::supports_ssse3()) // requires at least SSSE3 1416 ret_value = false; 1417 break; 1418 case Op_MulReductionVI: 1419 if (UseSSE < 4) // requires at least SSE4 1420 ret_value = false; 1421 break; 1422 case Op_AddReductionVF: 1423 case Op_AddReductionVD: 1424 case Op_MulReductionVF: 1425 case Op_MulReductionVD: 1426 if (UseSSE < 1) // requires at least SSE 1427 ret_value = false; 1428 break; 1429 case Op_SqrtVD: 1430 case Op_SqrtVF: 1431 if (UseAVX < 1) // enabled for AVX only 1432 ret_value = false; 1433 break; 1434 case Op_CompareAndSwapL: 1435 case Op_RShiftVL: 1436 case Op_AbsVD: 1437 case Op_NegVD: 1438 if (UseSSE < 2) 1439 ret_value = false; 1440 break; 1441 case Op_MulVB: 1442 case Op_LShiftVB: 1443 case Op_RShiftVB: 1444 case Op_URShiftVB: 1445 if (UseSSE < 4) 1446 ret_value = false; 1447 break; 1448 #ifdef _LP64 1449 case Op_CompareAndSwapP: 1450 #endif 1451 if (!VM_Version::supports_cx8()) 1452 ret_value = false; 1453 break; 1454 case Op_CMoveVF: 1455 case Op_CMoveVD: 1456 if (UseAVX < 1 || UseAVX > 2) 1457 ret_value = false; 1458 break; 1459 case Op_StrIndexOf: 1460 if (!UseSSE42Intrinsics) 1461 ret_value = false; 1462 break; 1463 case Op_StrIndexOfChar: 1464 if (!UseSSE42Intrinsics) 1465 ret_value = false; 1466 break; 1467 case Op_OnSpinWait: 1468 if (VM_Version::supports_on_spin_wait() == false) 1469 ret_value = false; 1470 break; 1471 } 1472 1473 return ret_value; // Per default match rules are supported. 1474 } 1475 1476 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1477 // identify extra cases that we might want to provide match rules for 1478 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1479 bool ret_value = match_rule_supported(opcode); 1480 if (ret_value) { 1481 switch (opcode) { 1482 case Op_AbsVB: 1483 case Op_AddVB: 1484 case Op_SubVB: 1485 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1486 ret_value = false; 1487 break; 1488 case Op_AbsVS: 1489 case Op_AddVS: 1490 case Op_SubVS: 1491 case Op_MulVS: 1492 case Op_LShiftVS: 1493 case Op_RShiftVS: 1494 case Op_URShiftVS: 1495 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1496 ret_value = false; 1497 break; 1498 case Op_MulVB: 1499 case Op_LShiftVB: 1500 case Op_RShiftVB: 1501 case Op_URShiftVB: 1502 if ((vlen == 32 && UseAVX < 2) || 1503 ((vlen == 64) && (VM_Version::supports_avx512bw() == false))) 1504 ret_value = false; 1505 break; 1506 case Op_NegVF: 1507 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) 1508 ret_value = false; 1509 break; 1510 case Op_CMoveVF: 1511 if (vlen != 8) 1512 ret_value = false; 1513 break; 1514 case Op_NegVD: 1515 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) 1516 ret_value = false; 1517 break; 1518 case Op_CMoveVD: 1519 if (vlen != 4) 1520 ret_value = false; 1521 break; 1522 } 1523 } 1524 1525 return ret_value; // Per default match rules are supported. 1526 } 1527 1528 const bool Matcher::has_predicated_vectors(void) { 1529 bool ret_value = false; 1530 if (UseAVX > 2) { 1531 ret_value = VM_Version::supports_avx512vl(); 1532 } 1533 1534 return ret_value; 1535 } 1536 1537 const int Matcher::float_pressure(int default_pressure_threshold) { 1538 int float_pressure_threshold = default_pressure_threshold; 1539 #ifdef _LP64 1540 if (UseAVX > 2) { 1541 // Increase pressure threshold on machines with AVX3 which have 1542 // 2x more XMM registers. 1543 float_pressure_threshold = default_pressure_threshold * 2; 1544 } 1545 #endif 1546 return float_pressure_threshold; 1547 } 1548 1549 // Max vector size in bytes. 0 if not supported. 1550 const int Matcher::vector_width_in_bytes(BasicType bt) { 1551 assert(is_java_primitive(bt), "only primitive type vectors"); 1552 if (UseSSE < 2) return 0; 1553 // SSE2 supports 128bit vectors for all types. 1554 // AVX2 supports 256bit vectors for all types. 1555 // AVX2/EVEX supports 512bit vectors for all types. 1556 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1557 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1558 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1559 size = (UseAVX > 2) ? 64 : 32; 1560 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 1561 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 1562 // Use flag to limit vector size. 1563 size = MIN2(size,(int)MaxVectorSize); 1564 // Minimum 2 values in vector (or 4 for bytes). 1565 switch (bt) { 1566 case T_DOUBLE: 1567 case T_LONG: 1568 if (size < 16) return 0; 1569 break; 1570 case T_FLOAT: 1571 case T_INT: 1572 if (size < 8) return 0; 1573 break; 1574 case T_BOOLEAN: 1575 if (size < 4) return 0; 1576 break; 1577 case T_CHAR: 1578 if (size < 4) return 0; 1579 break; 1580 case T_BYTE: 1581 if (size < 4) return 0; 1582 break; 1583 case T_SHORT: 1584 if (size < 4) return 0; 1585 break; 1586 default: 1587 ShouldNotReachHere(); 1588 } 1589 return size; 1590 } 1591 1592 // Limits on vector size (number of elements) loaded into vector. 1593 const int Matcher::max_vector_size(const BasicType bt) { 1594 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1595 } 1596 const int Matcher::min_vector_size(const BasicType bt) { 1597 int max_size = max_vector_size(bt); 1598 // Min size which can be loaded into vector is 4 bytes. 1599 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1600 return MIN2(size,max_size); 1601 } 1602 1603 // Vector ideal reg corresponding to specified size in bytes 1604 const uint Matcher::vector_ideal_reg(int size) { 1605 assert(MaxVectorSize >= size, ""); 1606 switch(size) { 1607 case 4: return Op_VecS; 1608 case 8: return Op_VecD; 1609 case 16: return Op_VecX; 1610 case 32: return Op_VecY; 1611 case 64: return Op_VecZ; 1612 } 1613 ShouldNotReachHere(); 1614 return 0; 1615 } 1616 1617 // Only lowest bits of xmm reg are used for vector shift count. 1618 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1619 return Op_VecS; 1620 } 1621 1622 // x86 supports misaligned vectors store/load. 1623 const bool Matcher::misaligned_vectors_ok() { 1624 return true; 1625 } 1626 1627 // x86 AES instructions are compatible with SunJCE expanded 1628 // keys, hence we do not need to pass the original key to stubs 1629 const bool Matcher::pass_original_key_for_aes() { 1630 return false; 1631 } 1632 1633 1634 const bool Matcher::convi2l_type_required = true; 1635 1636 // Check for shift by small constant as well 1637 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1638 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1639 shift->in(2)->get_int() <= 3 && 1640 // Are there other uses besides address expressions? 1641 !matcher->is_visited(shift)) { 1642 address_visited.set(shift->_idx); // Flag as address_visited 1643 mstack.push(shift->in(2), Matcher::Visit); 1644 Node *conv = shift->in(1); 1645 #ifdef _LP64 1646 // Allow Matcher to match the rule which bypass 1647 // ConvI2L operation for an array index on LP64 1648 // if the index value is positive. 1649 if (conv->Opcode() == Op_ConvI2L && 1650 conv->as_Type()->type()->is_long()->_lo >= 0 && 1651 // Are there other uses besides address expressions? 1652 !matcher->is_visited(conv)) { 1653 address_visited.set(conv->_idx); // Flag as address_visited 1654 mstack.push(conv->in(1), Matcher::Pre_Visit); 1655 } else 1656 #endif 1657 mstack.push(conv, Matcher::Pre_Visit); 1658 return true; 1659 } 1660 return false; 1661 } 1662 1663 // Should the Matcher clone shifts on addressing modes, expecting them 1664 // to be subsumed into complex addressing expressions or compute them 1665 // into registers? 1666 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1667 Node *off = m->in(AddPNode::Offset); 1668 if (off->is_Con()) { 1669 address_visited.test_set(m->_idx); // Flag as address_visited 1670 Node *adr = m->in(AddPNode::Address); 1671 1672 // Intel can handle 2 adds in addressing mode 1673 // AtomicAdd is not an addressing expression. 1674 // Cheap to find it by looking for screwy base. 1675 if (adr->is_AddP() && 1676 !adr->in(AddPNode::Base)->is_top() && 1677 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 1678 // Are there other uses besides address expressions? 1679 !is_visited(adr)) { 1680 address_visited.set(adr->_idx); // Flag as address_visited 1681 Node *shift = adr->in(AddPNode::Offset); 1682 if (!clone_shift(shift, this, mstack, address_visited)) { 1683 mstack.push(shift, Pre_Visit); 1684 } 1685 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1686 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1687 } else { 1688 mstack.push(adr, Pre_Visit); 1689 } 1690 1691 // Clone X+offset as it also folds into most addressing expressions 1692 mstack.push(off, Visit); 1693 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1694 return true; 1695 } else if (clone_shift(off, this, mstack, address_visited)) { 1696 address_visited.test_set(m->_idx); // Flag as address_visited 1697 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1698 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1699 return true; 1700 } 1701 return false; 1702 } 1703 1704 void Compile::reshape_address(AddPNode* addp) { 1705 } 1706 1707 // Helper methods for MachSpillCopyNode::implementation(). 1708 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1709 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1710 // In 64-bit VM size calculation is very complex. Emitting instructions 1711 // into scratch buffer is used to get size in 64-bit VM. 1712 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1713 assert(ireg == Op_VecS || // 32bit vector 1714 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1715 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1716 "no non-adjacent vector moves" ); 1717 if (cbuf) { 1718 MacroAssembler _masm(cbuf); 1719 int offset = __ offset(); 1720 switch (ireg) { 1721 case Op_VecS: // copy whole register 1722 case Op_VecD: 1723 case Op_VecX: 1724 #ifndef _LP64 1725 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1726 #else 1727 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1728 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1729 } else { 1730 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1731 } 1732 #endif 1733 break; 1734 case Op_VecY: 1735 #ifndef _LP64 1736 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1737 #else 1738 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1739 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1740 } else { 1741 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 1742 } 1743 #endif 1744 break; 1745 case Op_VecZ: 1746 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1747 break; 1748 default: 1749 ShouldNotReachHere(); 1750 } 1751 int size = __ offset() - offset; 1752 #ifdef ASSERT 1753 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1754 assert(!do_size || size == 4, "incorrect size calculattion"); 1755 #endif 1756 return size; 1757 #ifndef PRODUCT 1758 } else if (!do_size) { 1759 switch (ireg) { 1760 case Op_VecS: 1761 case Op_VecD: 1762 case Op_VecX: 1763 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1764 break; 1765 case Op_VecY: 1766 case Op_VecZ: 1767 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1768 break; 1769 default: 1770 ShouldNotReachHere(); 1771 } 1772 #endif 1773 } 1774 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1775 return (UseAVX > 2) ? 6 : 4; 1776 } 1777 1778 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1779 int stack_offset, int reg, uint ireg, outputStream* st) { 1780 // In 64-bit VM size calculation is very complex. Emitting instructions 1781 // into scratch buffer is used to get size in 64-bit VM. 1782 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1783 if (cbuf) { 1784 MacroAssembler _masm(cbuf); 1785 int offset = __ offset(); 1786 if (is_load) { 1787 switch (ireg) { 1788 case Op_VecS: 1789 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1790 break; 1791 case Op_VecD: 1792 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1793 break; 1794 case Op_VecX: 1795 #ifndef _LP64 1796 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1797 #else 1798 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1799 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1800 } else { 1801 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1802 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1803 } 1804 #endif 1805 break; 1806 case Op_VecY: 1807 #ifndef _LP64 1808 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1809 #else 1810 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1811 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1812 } else { 1813 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1814 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 1815 } 1816 #endif 1817 break; 1818 case Op_VecZ: 1819 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1820 break; 1821 default: 1822 ShouldNotReachHere(); 1823 } 1824 } else { // store 1825 switch (ireg) { 1826 case Op_VecS: 1827 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1828 break; 1829 case Op_VecD: 1830 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1831 break; 1832 case Op_VecX: 1833 #ifndef _LP64 1834 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1835 #else 1836 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1837 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1838 } 1839 else { 1840 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1841 } 1842 #endif 1843 break; 1844 case Op_VecY: 1845 #ifndef _LP64 1846 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1847 #else 1848 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 1849 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1850 } 1851 else { 1852 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 1853 } 1854 #endif 1855 break; 1856 case Op_VecZ: 1857 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1858 break; 1859 default: 1860 ShouldNotReachHere(); 1861 } 1862 } 1863 int size = __ offset() - offset; 1864 #ifdef ASSERT 1865 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1866 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1867 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1868 #endif 1869 return size; 1870 #ifndef PRODUCT 1871 } else if (!do_size) { 1872 if (is_load) { 1873 switch (ireg) { 1874 case Op_VecS: 1875 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1876 break; 1877 case Op_VecD: 1878 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1879 break; 1880 case Op_VecX: 1881 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1882 break; 1883 case Op_VecY: 1884 case Op_VecZ: 1885 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1886 break; 1887 default: 1888 ShouldNotReachHere(); 1889 } 1890 } else { // store 1891 switch (ireg) { 1892 case Op_VecS: 1893 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1894 break; 1895 case Op_VecD: 1896 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1897 break; 1898 case Op_VecX: 1899 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1900 break; 1901 case Op_VecY: 1902 case Op_VecZ: 1903 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1904 break; 1905 default: 1906 ShouldNotReachHere(); 1907 } 1908 } 1909 #endif 1910 } 1911 bool is_single_byte = false; 1912 int vec_len = 0; 1913 if ((UseAVX > 2) && (stack_offset != 0)) { 1914 int tuple_type = Assembler::EVEX_FVM; 1915 int input_size = Assembler::EVEX_32bit; 1916 switch (ireg) { 1917 case Op_VecS: 1918 tuple_type = Assembler::EVEX_T1S; 1919 break; 1920 case Op_VecD: 1921 tuple_type = Assembler::EVEX_T1S; 1922 input_size = Assembler::EVEX_64bit; 1923 break; 1924 case Op_VecX: 1925 break; 1926 case Op_VecY: 1927 vec_len = 1; 1928 break; 1929 case Op_VecZ: 1930 vec_len = 2; 1931 break; 1932 } 1933 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1934 } 1935 int offset_size = 0; 1936 int size = 5; 1937 if (UseAVX > 2 ) { 1938 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1939 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1940 size += 2; // Need an additional two bytes for EVEX encoding 1941 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1942 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1943 } else { 1944 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1945 size += 2; // Need an additional two bytes for EVEX encodding 1946 } 1947 } else { 1948 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1949 } 1950 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1951 return size+offset_size; 1952 } 1953 1954 static inline jint replicate4_imm(int con, int width) { 1955 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1956 assert(width == 1 || width == 2, "only byte or short types here"); 1957 int bit_width = width * 8; 1958 jint val = con; 1959 val &= (1 << bit_width) - 1; // mask off sign bits 1960 while(bit_width < 32) { 1961 val |= (val << bit_width); 1962 bit_width <<= 1; 1963 } 1964 return val; 1965 } 1966 1967 static inline jlong replicate8_imm(int con, int width) { 1968 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1969 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1970 int bit_width = width * 8; 1971 jlong val = con; 1972 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1973 while(bit_width < 64) { 1974 val |= (val << bit_width); 1975 bit_width <<= 1; 1976 } 1977 return val; 1978 } 1979 1980 #ifndef PRODUCT 1981 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1982 st->print("nop \t# %d bytes pad for loops and calls", _count); 1983 } 1984 #endif 1985 1986 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1987 MacroAssembler _masm(&cbuf); 1988 __ nop(_count); 1989 } 1990 1991 uint MachNopNode::size(PhaseRegAlloc*) const { 1992 return _count; 1993 } 1994 1995 #ifndef PRODUCT 1996 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1997 st->print("# breakpoint"); 1998 } 1999 #endif 2000 2001 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2002 MacroAssembler _masm(&cbuf); 2003 __ int3(); 2004 } 2005 2006 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2007 return MachNode::size(ra_); 2008 } 2009 2010 %} 2011 2012 encode %{ 2013 2014 enc_class call_epilog %{ 2015 if (VerifyStackAtCalls) { 2016 // Check that stack depth is unchanged: find majik cookie on stack 2017 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2018 MacroAssembler _masm(&cbuf); 2019 Label L; 2020 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2021 __ jccb(Assembler::equal, L); 2022 // Die if stack mismatch 2023 __ int3(); 2024 __ bind(L); 2025 } 2026 %} 2027 2028 %} 2029 2030 2031 //----------OPERANDS----------------------------------------------------------- 2032 // Operand definitions must precede instruction definitions for correct parsing 2033 // in the ADLC because operands constitute user defined types which are used in 2034 // instruction definitions. 2035 2036 operand vecZ() %{ 2037 constraint(ALLOC_IN_RC(vectorz_reg)); 2038 match(VecZ); 2039 2040 format %{ %} 2041 interface(REG_INTER); 2042 %} 2043 2044 operand legVecZ() %{ 2045 constraint(ALLOC_IN_RC(vectorz_reg_vl)); 2046 match(VecZ); 2047 2048 format %{ %} 2049 interface(REG_INTER); 2050 %} 2051 2052 // Comparison Code for FP conditional move 2053 operand cmpOp_vcmppd() %{ 2054 match(Bool); 2055 2056 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2057 n->as_Bool()->_test._test != BoolTest::no_overflow); 2058 format %{ "" %} 2059 interface(COND_INTER) %{ 2060 equal (0x0, "eq"); 2061 less (0x1, "lt"); 2062 less_equal (0x2, "le"); 2063 not_equal (0xC, "ne"); 2064 greater_equal(0xD, "ge"); 2065 greater (0xE, "gt"); 2066 //TODO cannot compile (adlc breaks) without two next lines with error: 2067 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2068 // equal' for overflow. 2069 overflow (0x20, "o"); // not really supported by the instruction 2070 no_overflow (0x21, "no"); // not really supported by the instruction 2071 %} 2072 %} 2073 2074 2075 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2076 2077 // ============================================================================ 2078 2079 instruct ShouldNotReachHere() %{ 2080 match(Halt); 2081 format %{ "stop\t# ShouldNotReachHere" %} 2082 ins_encode %{ 2083 if (is_reachable()) { 2084 __ stop(_halt_reason); 2085 } 2086 %} 2087 ins_pipe(pipe_slow); 2088 %} 2089 2090 // =================================EVEX special=============================== 2091 2092 instruct setMask(rRegI dst, rRegI src) %{ 2093 predicate(Matcher::has_predicated_vectors()); 2094 match(Set dst (SetVectMaskI src)); 2095 effect(TEMP dst); 2096 format %{ "setvectmask $dst, $src" %} 2097 ins_encode %{ 2098 __ setvectmask($dst$$Register, $src$$Register); 2099 %} 2100 ins_pipe(pipe_slow); 2101 %} 2102 2103 // ============================================================================ 2104 2105 instruct addF_reg(regF dst, regF src) %{ 2106 predicate((UseSSE>=1) && (UseAVX == 0)); 2107 match(Set dst (AddF dst src)); 2108 2109 format %{ "addss $dst, $src" %} 2110 ins_cost(150); 2111 ins_encode %{ 2112 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2113 %} 2114 ins_pipe(pipe_slow); 2115 %} 2116 2117 instruct addF_mem(regF dst, memory src) %{ 2118 predicate((UseSSE>=1) && (UseAVX == 0)); 2119 match(Set dst (AddF dst (LoadF src))); 2120 2121 format %{ "addss $dst, $src" %} 2122 ins_cost(150); 2123 ins_encode %{ 2124 __ addss($dst$$XMMRegister, $src$$Address); 2125 %} 2126 ins_pipe(pipe_slow); 2127 %} 2128 2129 instruct addF_imm(regF dst, immF con) %{ 2130 predicate((UseSSE>=1) && (UseAVX == 0)); 2131 match(Set dst (AddF dst con)); 2132 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2133 ins_cost(150); 2134 ins_encode %{ 2135 __ addss($dst$$XMMRegister, $constantaddress($con)); 2136 %} 2137 ins_pipe(pipe_slow); 2138 %} 2139 2140 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2141 predicate(UseAVX > 0); 2142 match(Set dst (AddF src1 src2)); 2143 2144 format %{ "vaddss $dst, $src1, $src2" %} 2145 ins_cost(150); 2146 ins_encode %{ 2147 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2148 %} 2149 ins_pipe(pipe_slow); 2150 %} 2151 2152 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2153 predicate(UseAVX > 0); 2154 match(Set dst (AddF src1 (LoadF src2))); 2155 2156 format %{ "vaddss $dst, $src1, $src2" %} 2157 ins_cost(150); 2158 ins_encode %{ 2159 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2160 %} 2161 ins_pipe(pipe_slow); 2162 %} 2163 2164 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2165 predicate(UseAVX > 0); 2166 match(Set dst (AddF src con)); 2167 2168 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2169 ins_cost(150); 2170 ins_encode %{ 2171 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2172 %} 2173 ins_pipe(pipe_slow); 2174 %} 2175 2176 instruct addD_reg(regD dst, regD src) %{ 2177 predicate((UseSSE>=2) && (UseAVX == 0)); 2178 match(Set dst (AddD dst src)); 2179 2180 format %{ "addsd $dst, $src" %} 2181 ins_cost(150); 2182 ins_encode %{ 2183 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2184 %} 2185 ins_pipe(pipe_slow); 2186 %} 2187 2188 instruct addD_mem(regD dst, memory src) %{ 2189 predicate((UseSSE>=2) && (UseAVX == 0)); 2190 match(Set dst (AddD dst (LoadD src))); 2191 2192 format %{ "addsd $dst, $src" %} 2193 ins_cost(150); 2194 ins_encode %{ 2195 __ addsd($dst$$XMMRegister, $src$$Address); 2196 %} 2197 ins_pipe(pipe_slow); 2198 %} 2199 2200 instruct addD_imm(regD dst, immD con) %{ 2201 predicate((UseSSE>=2) && (UseAVX == 0)); 2202 match(Set dst (AddD dst con)); 2203 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2204 ins_cost(150); 2205 ins_encode %{ 2206 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2207 %} 2208 ins_pipe(pipe_slow); 2209 %} 2210 2211 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2212 predicate(UseAVX > 0); 2213 match(Set dst (AddD src1 src2)); 2214 2215 format %{ "vaddsd $dst, $src1, $src2" %} 2216 ins_cost(150); 2217 ins_encode %{ 2218 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2219 %} 2220 ins_pipe(pipe_slow); 2221 %} 2222 2223 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2224 predicate(UseAVX > 0); 2225 match(Set dst (AddD src1 (LoadD src2))); 2226 2227 format %{ "vaddsd $dst, $src1, $src2" %} 2228 ins_cost(150); 2229 ins_encode %{ 2230 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2231 %} 2232 ins_pipe(pipe_slow); 2233 %} 2234 2235 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2236 predicate(UseAVX > 0); 2237 match(Set dst (AddD src con)); 2238 2239 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2240 ins_cost(150); 2241 ins_encode %{ 2242 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2243 %} 2244 ins_pipe(pipe_slow); 2245 %} 2246 2247 instruct subF_reg(regF dst, regF src) %{ 2248 predicate((UseSSE>=1) && (UseAVX == 0)); 2249 match(Set dst (SubF dst src)); 2250 2251 format %{ "subss $dst, $src" %} 2252 ins_cost(150); 2253 ins_encode %{ 2254 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2255 %} 2256 ins_pipe(pipe_slow); 2257 %} 2258 2259 instruct subF_mem(regF dst, memory src) %{ 2260 predicate((UseSSE>=1) && (UseAVX == 0)); 2261 match(Set dst (SubF dst (LoadF src))); 2262 2263 format %{ "subss $dst, $src" %} 2264 ins_cost(150); 2265 ins_encode %{ 2266 __ subss($dst$$XMMRegister, $src$$Address); 2267 %} 2268 ins_pipe(pipe_slow); 2269 %} 2270 2271 instruct subF_imm(regF dst, immF con) %{ 2272 predicate((UseSSE>=1) && (UseAVX == 0)); 2273 match(Set dst (SubF dst con)); 2274 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2275 ins_cost(150); 2276 ins_encode %{ 2277 __ subss($dst$$XMMRegister, $constantaddress($con)); 2278 %} 2279 ins_pipe(pipe_slow); 2280 %} 2281 2282 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2283 predicate(UseAVX > 0); 2284 match(Set dst (SubF src1 src2)); 2285 2286 format %{ "vsubss $dst, $src1, $src2" %} 2287 ins_cost(150); 2288 ins_encode %{ 2289 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2290 %} 2291 ins_pipe(pipe_slow); 2292 %} 2293 2294 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2295 predicate(UseAVX > 0); 2296 match(Set dst (SubF src1 (LoadF src2))); 2297 2298 format %{ "vsubss $dst, $src1, $src2" %} 2299 ins_cost(150); 2300 ins_encode %{ 2301 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2302 %} 2303 ins_pipe(pipe_slow); 2304 %} 2305 2306 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2307 predicate(UseAVX > 0); 2308 match(Set dst (SubF src con)); 2309 2310 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2311 ins_cost(150); 2312 ins_encode %{ 2313 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2314 %} 2315 ins_pipe(pipe_slow); 2316 %} 2317 2318 instruct subD_reg(regD dst, regD src) %{ 2319 predicate((UseSSE>=2) && (UseAVX == 0)); 2320 match(Set dst (SubD dst src)); 2321 2322 format %{ "subsd $dst, $src" %} 2323 ins_cost(150); 2324 ins_encode %{ 2325 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2326 %} 2327 ins_pipe(pipe_slow); 2328 %} 2329 2330 instruct subD_mem(regD dst, memory src) %{ 2331 predicate((UseSSE>=2) && (UseAVX == 0)); 2332 match(Set dst (SubD dst (LoadD src))); 2333 2334 format %{ "subsd $dst, $src" %} 2335 ins_cost(150); 2336 ins_encode %{ 2337 __ subsd($dst$$XMMRegister, $src$$Address); 2338 %} 2339 ins_pipe(pipe_slow); 2340 %} 2341 2342 instruct subD_imm(regD dst, immD con) %{ 2343 predicate((UseSSE>=2) && (UseAVX == 0)); 2344 match(Set dst (SubD dst con)); 2345 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2346 ins_cost(150); 2347 ins_encode %{ 2348 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2349 %} 2350 ins_pipe(pipe_slow); 2351 %} 2352 2353 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2354 predicate(UseAVX > 0); 2355 match(Set dst (SubD src1 src2)); 2356 2357 format %{ "vsubsd $dst, $src1, $src2" %} 2358 ins_cost(150); 2359 ins_encode %{ 2360 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2361 %} 2362 ins_pipe(pipe_slow); 2363 %} 2364 2365 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2366 predicate(UseAVX > 0); 2367 match(Set dst (SubD src1 (LoadD src2))); 2368 2369 format %{ "vsubsd $dst, $src1, $src2" %} 2370 ins_cost(150); 2371 ins_encode %{ 2372 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2373 %} 2374 ins_pipe(pipe_slow); 2375 %} 2376 2377 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2378 predicate(UseAVX > 0); 2379 match(Set dst (SubD src con)); 2380 2381 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2382 ins_cost(150); 2383 ins_encode %{ 2384 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2385 %} 2386 ins_pipe(pipe_slow); 2387 %} 2388 2389 instruct mulF_reg(regF dst, regF src) %{ 2390 predicate((UseSSE>=1) && (UseAVX == 0)); 2391 match(Set dst (MulF dst src)); 2392 2393 format %{ "mulss $dst, $src" %} 2394 ins_cost(150); 2395 ins_encode %{ 2396 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2397 %} 2398 ins_pipe(pipe_slow); 2399 %} 2400 2401 instruct mulF_mem(regF dst, memory src) %{ 2402 predicate((UseSSE>=1) && (UseAVX == 0)); 2403 match(Set dst (MulF dst (LoadF src))); 2404 2405 format %{ "mulss $dst, $src" %} 2406 ins_cost(150); 2407 ins_encode %{ 2408 __ mulss($dst$$XMMRegister, $src$$Address); 2409 %} 2410 ins_pipe(pipe_slow); 2411 %} 2412 2413 instruct mulF_imm(regF dst, immF con) %{ 2414 predicate((UseSSE>=1) && (UseAVX == 0)); 2415 match(Set dst (MulF dst con)); 2416 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2417 ins_cost(150); 2418 ins_encode %{ 2419 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2420 %} 2421 ins_pipe(pipe_slow); 2422 %} 2423 2424 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2425 predicate(UseAVX > 0); 2426 match(Set dst (MulF src1 src2)); 2427 2428 format %{ "vmulss $dst, $src1, $src2" %} 2429 ins_cost(150); 2430 ins_encode %{ 2431 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2432 %} 2433 ins_pipe(pipe_slow); 2434 %} 2435 2436 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2437 predicate(UseAVX > 0); 2438 match(Set dst (MulF src1 (LoadF src2))); 2439 2440 format %{ "vmulss $dst, $src1, $src2" %} 2441 ins_cost(150); 2442 ins_encode %{ 2443 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2444 %} 2445 ins_pipe(pipe_slow); 2446 %} 2447 2448 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2449 predicate(UseAVX > 0); 2450 match(Set dst (MulF src con)); 2451 2452 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2453 ins_cost(150); 2454 ins_encode %{ 2455 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2456 %} 2457 ins_pipe(pipe_slow); 2458 %} 2459 2460 instruct mulD_reg(regD dst, regD src) %{ 2461 predicate((UseSSE>=2) && (UseAVX == 0)); 2462 match(Set dst (MulD dst src)); 2463 2464 format %{ "mulsd $dst, $src" %} 2465 ins_cost(150); 2466 ins_encode %{ 2467 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2468 %} 2469 ins_pipe(pipe_slow); 2470 %} 2471 2472 instruct mulD_mem(regD dst, memory src) %{ 2473 predicate((UseSSE>=2) && (UseAVX == 0)); 2474 match(Set dst (MulD dst (LoadD src))); 2475 2476 format %{ "mulsd $dst, $src" %} 2477 ins_cost(150); 2478 ins_encode %{ 2479 __ mulsd($dst$$XMMRegister, $src$$Address); 2480 %} 2481 ins_pipe(pipe_slow); 2482 %} 2483 2484 instruct mulD_imm(regD dst, immD con) %{ 2485 predicate((UseSSE>=2) && (UseAVX == 0)); 2486 match(Set dst (MulD dst con)); 2487 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2488 ins_cost(150); 2489 ins_encode %{ 2490 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2491 %} 2492 ins_pipe(pipe_slow); 2493 %} 2494 2495 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2496 predicate(UseAVX > 0); 2497 match(Set dst (MulD src1 src2)); 2498 2499 format %{ "vmulsd $dst, $src1, $src2" %} 2500 ins_cost(150); 2501 ins_encode %{ 2502 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2503 %} 2504 ins_pipe(pipe_slow); 2505 %} 2506 2507 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2508 predicate(UseAVX > 0); 2509 match(Set dst (MulD src1 (LoadD src2))); 2510 2511 format %{ "vmulsd $dst, $src1, $src2" %} 2512 ins_cost(150); 2513 ins_encode %{ 2514 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2515 %} 2516 ins_pipe(pipe_slow); 2517 %} 2518 2519 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2520 predicate(UseAVX > 0); 2521 match(Set dst (MulD src con)); 2522 2523 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2524 ins_cost(150); 2525 ins_encode %{ 2526 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2527 %} 2528 ins_pipe(pipe_slow); 2529 %} 2530 2531 instruct divF_reg(regF dst, regF src) %{ 2532 predicate((UseSSE>=1) && (UseAVX == 0)); 2533 match(Set dst (DivF dst src)); 2534 2535 format %{ "divss $dst, $src" %} 2536 ins_cost(150); 2537 ins_encode %{ 2538 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2539 %} 2540 ins_pipe(pipe_slow); 2541 %} 2542 2543 instruct divF_mem(regF dst, memory src) %{ 2544 predicate((UseSSE>=1) && (UseAVX == 0)); 2545 match(Set dst (DivF dst (LoadF src))); 2546 2547 format %{ "divss $dst, $src" %} 2548 ins_cost(150); 2549 ins_encode %{ 2550 __ divss($dst$$XMMRegister, $src$$Address); 2551 %} 2552 ins_pipe(pipe_slow); 2553 %} 2554 2555 instruct divF_imm(regF dst, immF con) %{ 2556 predicate((UseSSE>=1) && (UseAVX == 0)); 2557 match(Set dst (DivF dst con)); 2558 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2559 ins_cost(150); 2560 ins_encode %{ 2561 __ divss($dst$$XMMRegister, $constantaddress($con)); 2562 %} 2563 ins_pipe(pipe_slow); 2564 %} 2565 2566 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2567 predicate(UseAVX > 0); 2568 match(Set dst (DivF src1 src2)); 2569 2570 format %{ "vdivss $dst, $src1, $src2" %} 2571 ins_cost(150); 2572 ins_encode %{ 2573 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2574 %} 2575 ins_pipe(pipe_slow); 2576 %} 2577 2578 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2579 predicate(UseAVX > 0); 2580 match(Set dst (DivF src1 (LoadF src2))); 2581 2582 format %{ "vdivss $dst, $src1, $src2" %} 2583 ins_cost(150); 2584 ins_encode %{ 2585 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2586 %} 2587 ins_pipe(pipe_slow); 2588 %} 2589 2590 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2591 predicate(UseAVX > 0); 2592 match(Set dst (DivF src con)); 2593 2594 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2595 ins_cost(150); 2596 ins_encode %{ 2597 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2598 %} 2599 ins_pipe(pipe_slow); 2600 %} 2601 2602 instruct divD_reg(regD dst, regD src) %{ 2603 predicate((UseSSE>=2) && (UseAVX == 0)); 2604 match(Set dst (DivD dst src)); 2605 2606 format %{ "divsd $dst, $src" %} 2607 ins_cost(150); 2608 ins_encode %{ 2609 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2610 %} 2611 ins_pipe(pipe_slow); 2612 %} 2613 2614 instruct divD_mem(regD dst, memory src) %{ 2615 predicate((UseSSE>=2) && (UseAVX == 0)); 2616 match(Set dst (DivD dst (LoadD src))); 2617 2618 format %{ "divsd $dst, $src" %} 2619 ins_cost(150); 2620 ins_encode %{ 2621 __ divsd($dst$$XMMRegister, $src$$Address); 2622 %} 2623 ins_pipe(pipe_slow); 2624 %} 2625 2626 instruct divD_imm(regD dst, immD con) %{ 2627 predicate((UseSSE>=2) && (UseAVX == 0)); 2628 match(Set dst (DivD dst con)); 2629 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2630 ins_cost(150); 2631 ins_encode %{ 2632 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2633 %} 2634 ins_pipe(pipe_slow); 2635 %} 2636 2637 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2638 predicate(UseAVX > 0); 2639 match(Set dst (DivD src1 src2)); 2640 2641 format %{ "vdivsd $dst, $src1, $src2" %} 2642 ins_cost(150); 2643 ins_encode %{ 2644 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2645 %} 2646 ins_pipe(pipe_slow); 2647 %} 2648 2649 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2650 predicate(UseAVX > 0); 2651 match(Set dst (DivD src1 (LoadD src2))); 2652 2653 format %{ "vdivsd $dst, $src1, $src2" %} 2654 ins_cost(150); 2655 ins_encode %{ 2656 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2657 %} 2658 ins_pipe(pipe_slow); 2659 %} 2660 2661 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2662 predicate(UseAVX > 0); 2663 match(Set dst (DivD src con)); 2664 2665 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2666 ins_cost(150); 2667 ins_encode %{ 2668 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2669 %} 2670 ins_pipe(pipe_slow); 2671 %} 2672 2673 instruct absF_reg(regF dst) %{ 2674 predicate((UseSSE>=1) && (UseAVX == 0)); 2675 match(Set dst (AbsF dst)); 2676 ins_cost(150); 2677 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2678 ins_encode %{ 2679 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2680 %} 2681 ins_pipe(pipe_slow); 2682 %} 2683 2684 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 2685 predicate(UseAVX > 0); 2686 match(Set dst (AbsF src)); 2687 ins_cost(150); 2688 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2689 ins_encode %{ 2690 int vector_len = 0; 2691 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2692 ExternalAddress(float_signmask()), vector_len); 2693 %} 2694 ins_pipe(pipe_slow); 2695 %} 2696 2697 instruct absD_reg(regD dst) %{ 2698 predicate((UseSSE>=2) && (UseAVX == 0)); 2699 match(Set dst (AbsD dst)); 2700 ins_cost(150); 2701 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2702 "# abs double by sign masking" %} 2703 ins_encode %{ 2704 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2705 %} 2706 ins_pipe(pipe_slow); 2707 %} 2708 2709 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 2710 predicate(UseAVX > 0); 2711 match(Set dst (AbsD src)); 2712 ins_cost(150); 2713 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2714 "# abs double by sign masking" %} 2715 ins_encode %{ 2716 int vector_len = 0; 2717 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2718 ExternalAddress(double_signmask()), vector_len); 2719 %} 2720 ins_pipe(pipe_slow); 2721 %} 2722 2723 instruct negF_reg(regF dst) %{ 2724 predicate((UseSSE>=1) && (UseAVX == 0)); 2725 match(Set dst (NegF dst)); 2726 ins_cost(150); 2727 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2728 ins_encode %{ 2729 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2730 %} 2731 ins_pipe(pipe_slow); 2732 %} 2733 2734 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 2735 predicate(UseAVX > 0); 2736 match(Set dst (NegF src)); 2737 ins_cost(150); 2738 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2739 ins_encode %{ 2740 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2741 ExternalAddress(float_signflip())); 2742 %} 2743 ins_pipe(pipe_slow); 2744 %} 2745 2746 instruct negD_reg(regD dst) %{ 2747 predicate((UseSSE>=2) && (UseAVX == 0)); 2748 match(Set dst (NegD dst)); 2749 ins_cost(150); 2750 format %{ "xorpd $dst, [0x8000000000000000]\t" 2751 "# neg double by sign flipping" %} 2752 ins_encode %{ 2753 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2754 %} 2755 ins_pipe(pipe_slow); 2756 %} 2757 2758 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 2759 predicate(UseAVX > 0); 2760 match(Set dst (NegD src)); 2761 ins_cost(150); 2762 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 2763 "# neg double by sign flipping" %} 2764 ins_encode %{ 2765 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2766 ExternalAddress(double_signflip())); 2767 %} 2768 ins_pipe(pipe_slow); 2769 %} 2770 2771 instruct sqrtF_reg(regF dst, regF src) %{ 2772 predicate(UseSSE>=1); 2773 match(Set dst (SqrtF src)); 2774 2775 format %{ "sqrtss $dst, $src" %} 2776 ins_cost(150); 2777 ins_encode %{ 2778 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2779 %} 2780 ins_pipe(pipe_slow); 2781 %} 2782 2783 instruct sqrtF_mem(regF dst, memory src) %{ 2784 predicate(UseSSE>=1); 2785 match(Set dst (SqrtF (LoadF src))); 2786 2787 format %{ "sqrtss $dst, $src" %} 2788 ins_cost(150); 2789 ins_encode %{ 2790 __ sqrtss($dst$$XMMRegister, $src$$Address); 2791 %} 2792 ins_pipe(pipe_slow); 2793 %} 2794 2795 instruct sqrtF_imm(regF dst, immF con) %{ 2796 predicate(UseSSE>=1); 2797 match(Set dst (SqrtF con)); 2798 2799 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2800 ins_cost(150); 2801 ins_encode %{ 2802 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2803 %} 2804 ins_pipe(pipe_slow); 2805 %} 2806 2807 instruct sqrtD_reg(regD dst, regD src) %{ 2808 predicate(UseSSE>=2); 2809 match(Set dst (SqrtD src)); 2810 2811 format %{ "sqrtsd $dst, $src" %} 2812 ins_cost(150); 2813 ins_encode %{ 2814 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2815 %} 2816 ins_pipe(pipe_slow); 2817 %} 2818 2819 instruct sqrtD_mem(regD dst, memory src) %{ 2820 predicate(UseSSE>=2); 2821 match(Set dst (SqrtD (LoadD src))); 2822 2823 format %{ "sqrtsd $dst, $src" %} 2824 ins_cost(150); 2825 ins_encode %{ 2826 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2827 %} 2828 ins_pipe(pipe_slow); 2829 %} 2830 2831 instruct sqrtD_imm(regD dst, immD con) %{ 2832 predicate(UseSSE>=2); 2833 match(Set dst (SqrtD con)); 2834 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2835 ins_cost(150); 2836 ins_encode %{ 2837 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2838 %} 2839 ins_pipe(pipe_slow); 2840 %} 2841 2842 instruct onspinwait() %{ 2843 match(OnSpinWait); 2844 ins_cost(200); 2845 2846 format %{ 2847 $$template 2848 if (os::is_MP()) { 2849 $$emit$$"pause\t! membar_onspinwait" 2850 } else { 2851 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2852 } 2853 %} 2854 ins_encode %{ 2855 __ pause(); 2856 %} 2857 ins_pipe(pipe_slow); 2858 %} 2859 2860 // a * b + c 2861 instruct fmaD_reg(regD a, regD b, regD c) %{ 2862 predicate(UseFMA); 2863 match(Set c (FmaD c (Binary a b))); 2864 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2865 ins_cost(150); 2866 ins_encode %{ 2867 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2868 %} 2869 ins_pipe( pipe_slow ); 2870 %} 2871 2872 // a * b + c 2873 instruct fmaF_reg(regF a, regF b, regF c) %{ 2874 predicate(UseFMA); 2875 match(Set c (FmaF c (Binary a b))); 2876 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2877 ins_cost(150); 2878 ins_encode %{ 2879 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2880 %} 2881 ins_pipe( pipe_slow ); 2882 %} 2883 2884 // ====================VECTOR INSTRUCTIONS===================================== 2885 2886 2887 // Load vectors (4 bytes long) 2888 instruct loadV4(vecS dst, memory mem) %{ 2889 predicate(n->as_LoadVector()->memory_size() == 4); 2890 match(Set dst (LoadVector mem)); 2891 ins_cost(125); 2892 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2893 ins_encode %{ 2894 __ movdl($dst$$XMMRegister, $mem$$Address); 2895 %} 2896 ins_pipe( pipe_slow ); 2897 %} 2898 2899 // Load vectors (4 bytes long) 2900 instruct MoveVecS2Leg(legVecS dst, vecS src) %{ 2901 match(Set dst src); 2902 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2903 ins_encode %{ 2904 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2905 %} 2906 ins_pipe( fpu_reg_reg ); 2907 %} 2908 2909 // Load vectors (4 bytes long) 2910 instruct MoveLeg2VecS(vecS dst, legVecS src) %{ 2911 match(Set dst src); 2912 format %{ "movss $dst,$src\t! load vector (4 bytes)" %} 2913 ins_encode %{ 2914 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 2915 %} 2916 ins_pipe( fpu_reg_reg ); 2917 %} 2918 2919 // Load vectors (8 bytes long) 2920 instruct loadV8(vecD dst, memory mem) %{ 2921 predicate(n->as_LoadVector()->memory_size() == 8); 2922 match(Set dst (LoadVector mem)); 2923 ins_cost(125); 2924 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2925 ins_encode %{ 2926 __ movq($dst$$XMMRegister, $mem$$Address); 2927 %} 2928 ins_pipe( pipe_slow ); 2929 %} 2930 2931 // Load vectors (8 bytes long) 2932 instruct MoveVecD2Leg(legVecD dst, vecD src) %{ 2933 match(Set dst src); 2934 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2935 ins_encode %{ 2936 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2937 %} 2938 ins_pipe( fpu_reg_reg ); 2939 %} 2940 2941 // Load vectors (8 bytes long) 2942 instruct MoveLeg2VecD(vecD dst, legVecD src) %{ 2943 match(Set dst src); 2944 format %{ "movsd $dst,$src\t! load vector (8 bytes)" %} 2945 ins_encode %{ 2946 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 2947 %} 2948 ins_pipe( fpu_reg_reg ); 2949 %} 2950 2951 // Load vectors (16 bytes long) 2952 instruct loadV16(vecX dst, memory mem) %{ 2953 predicate(n->as_LoadVector()->memory_size() == 16); 2954 match(Set dst (LoadVector mem)); 2955 ins_cost(125); 2956 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2957 ins_encode %{ 2958 __ movdqu($dst$$XMMRegister, $mem$$Address); 2959 %} 2960 ins_pipe( pipe_slow ); 2961 %} 2962 2963 // Load vectors (16 bytes long) 2964 instruct MoveVecX2Leg(legVecX dst, vecX src) %{ 2965 match(Set dst src); 2966 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2967 ins_encode %{ 2968 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2969 int vector_len = 2; 2970 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2971 } else { 2972 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2973 } 2974 %} 2975 ins_pipe( fpu_reg_reg ); 2976 %} 2977 2978 // Load vectors (16 bytes long) 2979 instruct MoveLeg2VecX(vecX dst, legVecX src) %{ 2980 match(Set dst src); 2981 format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %} 2982 ins_encode %{ 2983 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 2984 int vector_len = 2; 2985 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 2986 } else { 2987 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2988 } 2989 %} 2990 ins_pipe( fpu_reg_reg ); 2991 %} 2992 2993 // Load vectors (32 bytes long) 2994 instruct loadV32(vecY dst, memory mem) %{ 2995 predicate(n->as_LoadVector()->memory_size() == 32); 2996 match(Set dst (LoadVector mem)); 2997 ins_cost(125); 2998 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2999 ins_encode %{ 3000 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3001 %} 3002 ins_pipe( pipe_slow ); 3003 %} 3004 3005 // Load vectors (32 bytes long) 3006 instruct MoveVecY2Leg(legVecY dst, vecY src) %{ 3007 match(Set dst src); 3008 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3009 ins_encode %{ 3010 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3011 int vector_len = 2; 3012 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3013 } else { 3014 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3015 } 3016 %} 3017 ins_pipe( fpu_reg_reg ); 3018 %} 3019 3020 // Load vectors (32 bytes long) 3021 instruct MoveLeg2VecY(vecY dst, legVecY src) %{ 3022 match(Set dst src); 3023 format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %} 3024 ins_encode %{ 3025 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 3026 int vector_len = 2; 3027 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3028 } else { 3029 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3030 } 3031 %} 3032 ins_pipe( fpu_reg_reg ); 3033 %} 3034 3035 // Load vectors (64 bytes long) 3036 instruct loadV64_dword(vecZ dst, memory mem) %{ 3037 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3038 match(Set dst (LoadVector mem)); 3039 ins_cost(125); 3040 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3041 ins_encode %{ 3042 int vector_len = 2; 3043 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3044 %} 3045 ins_pipe( pipe_slow ); 3046 %} 3047 3048 // Load vectors (64 bytes long) 3049 instruct loadV64_qword(vecZ dst, memory mem) %{ 3050 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3051 match(Set dst (LoadVector mem)); 3052 ins_cost(125); 3053 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3054 ins_encode %{ 3055 int vector_len = 2; 3056 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3057 %} 3058 ins_pipe( pipe_slow ); 3059 %} 3060 3061 instruct MoveVecZ2Leg(legVecZ dst, vecZ src) %{ 3062 match(Set dst src); 3063 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3064 ins_encode %{ 3065 int vector_len = 2; 3066 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3067 %} 3068 ins_pipe( fpu_reg_reg ); 3069 %} 3070 3071 instruct MoveLeg2VecZ(vecZ dst, legVecZ src) %{ 3072 match(Set dst src); 3073 format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %} 3074 ins_encode %{ 3075 int vector_len = 2; 3076 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 3077 %} 3078 ins_pipe( fpu_reg_reg ); 3079 %} 3080 3081 // Store vectors 3082 instruct storeV4(memory mem, vecS src) %{ 3083 predicate(n->as_StoreVector()->memory_size() == 4); 3084 match(Set mem (StoreVector mem src)); 3085 ins_cost(145); 3086 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3087 ins_encode %{ 3088 __ movdl($mem$$Address, $src$$XMMRegister); 3089 %} 3090 ins_pipe( pipe_slow ); 3091 %} 3092 3093 instruct storeV8(memory mem, vecD src) %{ 3094 predicate(n->as_StoreVector()->memory_size() == 8); 3095 match(Set mem (StoreVector mem src)); 3096 ins_cost(145); 3097 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3098 ins_encode %{ 3099 __ movq($mem$$Address, $src$$XMMRegister); 3100 %} 3101 ins_pipe( pipe_slow ); 3102 %} 3103 3104 instruct storeV16(memory mem, vecX src) %{ 3105 predicate(n->as_StoreVector()->memory_size() == 16); 3106 match(Set mem (StoreVector mem src)); 3107 ins_cost(145); 3108 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3109 ins_encode %{ 3110 __ movdqu($mem$$Address, $src$$XMMRegister); 3111 %} 3112 ins_pipe( pipe_slow ); 3113 %} 3114 3115 instruct storeV32(memory mem, vecY src) %{ 3116 predicate(n->as_StoreVector()->memory_size() == 32); 3117 match(Set mem (StoreVector mem src)); 3118 ins_cost(145); 3119 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3120 ins_encode %{ 3121 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3122 %} 3123 ins_pipe( pipe_slow ); 3124 %} 3125 3126 instruct storeV64_dword(memory mem, vecZ src) %{ 3127 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3128 match(Set mem (StoreVector mem src)); 3129 ins_cost(145); 3130 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3131 ins_encode %{ 3132 int vector_len = 2; 3133 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3134 %} 3135 ins_pipe( pipe_slow ); 3136 %} 3137 3138 instruct storeV64_qword(memory mem, vecZ src) %{ 3139 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3140 match(Set mem (StoreVector mem src)); 3141 ins_cost(145); 3142 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3143 ins_encode %{ 3144 int vector_len = 2; 3145 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3146 %} 3147 ins_pipe( pipe_slow ); 3148 %} 3149 3150 // ====================LEGACY REPLICATE======================================= 3151 3152 instruct Repl16B(vecX dst, rRegI src) %{ 3153 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3154 match(Set dst (ReplicateB src)); 3155 format %{ "movd $dst,$src\n\t" 3156 "punpcklbw $dst,$dst\n\t" 3157 "pshuflw $dst,$dst,0x00\n\t" 3158 "punpcklqdq $dst,$dst\t! replicate16B" %} 3159 ins_encode %{ 3160 __ movdl($dst$$XMMRegister, $src$$Register); 3161 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3162 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3163 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3164 %} 3165 ins_pipe( pipe_slow ); 3166 %} 3167 3168 instruct Repl32B(vecY dst, rRegI src) %{ 3169 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3170 match(Set dst (ReplicateB src)); 3171 format %{ "movd $dst,$src\n\t" 3172 "punpcklbw $dst,$dst\n\t" 3173 "pshuflw $dst,$dst,0x00\n\t" 3174 "punpcklqdq $dst,$dst\n\t" 3175 "vinserti128_high $dst,$dst\t! replicate32B" %} 3176 ins_encode %{ 3177 __ movdl($dst$$XMMRegister, $src$$Register); 3178 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3179 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3180 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3181 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 3186 instruct Repl64B(legVecZ dst, rRegI src) %{ 3187 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3188 match(Set dst (ReplicateB src)); 3189 format %{ "movd $dst,$src\n\t" 3190 "punpcklbw $dst,$dst\n\t" 3191 "pshuflw $dst,$dst,0x00\n\t" 3192 "punpcklqdq $dst,$dst\n\t" 3193 "vinserti128_high $dst,$dst\t" 3194 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %} 3195 ins_encode %{ 3196 __ movdl($dst$$XMMRegister, $src$$Register); 3197 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3198 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3200 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3201 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3202 %} 3203 ins_pipe( pipe_slow ); 3204 %} 3205 3206 instruct Repl16B_imm(vecX dst, immI con) %{ 3207 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3208 match(Set dst (ReplicateB con)); 3209 format %{ "movq $dst,[$constantaddress]\n\t" 3210 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3211 ins_encode %{ 3212 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3213 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3214 %} 3215 ins_pipe( pipe_slow ); 3216 %} 3217 3218 instruct Repl32B_imm(vecY dst, immI con) %{ 3219 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3220 match(Set dst (ReplicateB con)); 3221 format %{ "movq $dst,[$constantaddress]\n\t" 3222 "punpcklqdq $dst,$dst\n\t" 3223 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3224 ins_encode %{ 3225 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3226 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3227 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3228 %} 3229 ins_pipe( pipe_slow ); 3230 %} 3231 3232 instruct Repl64B_imm(legVecZ dst, immI con) %{ 3233 predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw()); 3234 match(Set dst (ReplicateB con)); 3235 format %{ "movq $dst,[$constantaddress]\n\t" 3236 "punpcklqdq $dst,$dst\n\t" 3237 "vinserti128_high $dst,$dst\t" 3238 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %} 3239 ins_encode %{ 3240 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3241 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3242 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3243 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct Repl4S(vecD dst, rRegI src) %{ 3249 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3250 match(Set dst (ReplicateS src)); 3251 format %{ "movd $dst,$src\n\t" 3252 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3253 ins_encode %{ 3254 __ movdl($dst$$XMMRegister, $src$$Register); 3255 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3256 %} 3257 ins_pipe( pipe_slow ); 3258 %} 3259 3260 instruct Repl4S_mem(vecD dst, memory mem) %{ 3261 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3262 match(Set dst (ReplicateS (LoadS mem))); 3263 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3264 ins_encode %{ 3265 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3266 %} 3267 ins_pipe( pipe_slow ); 3268 %} 3269 3270 instruct Repl8S(vecX dst, rRegI src) %{ 3271 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3272 match(Set dst (ReplicateS src)); 3273 format %{ "movd $dst,$src\n\t" 3274 "pshuflw $dst,$dst,0x00\n\t" 3275 "punpcklqdq $dst,$dst\t! replicate8S" %} 3276 ins_encode %{ 3277 __ movdl($dst$$XMMRegister, $src$$Register); 3278 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3279 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3280 %} 3281 ins_pipe( pipe_slow ); 3282 %} 3283 3284 instruct Repl8S_mem(vecX dst, memory mem) %{ 3285 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3286 match(Set dst (ReplicateS (LoadS mem))); 3287 format %{ "pshuflw $dst,$mem,0x00\n\t" 3288 "punpcklqdq $dst,$dst\t! replicate8S" %} 3289 ins_encode %{ 3290 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3291 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3292 %} 3293 ins_pipe( pipe_slow ); 3294 %} 3295 3296 instruct Repl8S_imm(vecX dst, immI con) %{ 3297 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3298 match(Set dst (ReplicateS con)); 3299 format %{ "movq $dst,[$constantaddress]\n\t" 3300 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3301 ins_encode %{ 3302 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3303 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3304 %} 3305 ins_pipe( pipe_slow ); 3306 %} 3307 3308 instruct Repl16S(vecY dst, rRegI src) %{ 3309 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3310 match(Set dst (ReplicateS src)); 3311 format %{ "movd $dst,$src\n\t" 3312 "pshuflw $dst,$dst,0x00\n\t" 3313 "punpcklqdq $dst,$dst\n\t" 3314 "vinserti128_high $dst,$dst\t! replicate16S" %} 3315 ins_encode %{ 3316 __ movdl($dst$$XMMRegister, $src$$Register); 3317 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3318 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3319 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3320 %} 3321 ins_pipe( pipe_slow ); 3322 %} 3323 3324 instruct Repl16S_mem(vecY dst, memory mem) %{ 3325 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3326 match(Set dst (ReplicateS (LoadS mem))); 3327 format %{ "pshuflw $dst,$mem,0x00\n\t" 3328 "punpcklqdq $dst,$dst\n\t" 3329 "vinserti128_high $dst,$dst\t! replicate16S" %} 3330 ins_encode %{ 3331 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3332 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3333 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3334 %} 3335 ins_pipe( pipe_slow ); 3336 %} 3337 3338 instruct Repl16S_imm(vecY dst, immI con) %{ 3339 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3340 match(Set dst (ReplicateS con)); 3341 format %{ "movq $dst,[$constantaddress]\n\t" 3342 "punpcklqdq $dst,$dst\n\t" 3343 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3344 ins_encode %{ 3345 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3346 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3347 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3348 %} 3349 ins_pipe( pipe_slow ); 3350 %} 3351 3352 instruct Repl32S(legVecZ dst, rRegI src) %{ 3353 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3354 match(Set dst (ReplicateS src)); 3355 format %{ "movd $dst,$src\n\t" 3356 "pshuflw $dst,$dst,0x00\n\t" 3357 "punpcklqdq $dst,$dst\n\t" 3358 "vinserti128_high $dst,$dst\t" 3359 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3360 ins_encode %{ 3361 __ movdl($dst$$XMMRegister, $src$$Register); 3362 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3363 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3364 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3365 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 instruct Repl32S_mem(legVecZ dst, memory mem) %{ 3371 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3372 match(Set dst (ReplicateS (LoadS mem))); 3373 format %{ "pshuflw $dst,$mem,0x00\n\t" 3374 "punpcklqdq $dst,$dst\n\t" 3375 "vinserti128_high $dst,$dst\t" 3376 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %} 3377 ins_encode %{ 3378 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3379 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3380 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3381 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3382 %} 3383 ins_pipe( pipe_slow ); 3384 %} 3385 3386 instruct Repl32S_imm(legVecZ dst, immI con) %{ 3387 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3388 match(Set dst (ReplicateS con)); 3389 format %{ "movq $dst,[$constantaddress]\n\t" 3390 "punpcklqdq $dst,$dst\n\t" 3391 "vinserti128_high $dst,$dst\t" 3392 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %} 3393 ins_encode %{ 3394 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3395 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3396 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3397 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3398 %} 3399 ins_pipe( pipe_slow ); 3400 %} 3401 3402 instruct Repl4I(vecX dst, rRegI src) %{ 3403 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3404 match(Set dst (ReplicateI src)); 3405 format %{ "movd $dst,$src\n\t" 3406 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3407 ins_encode %{ 3408 __ movdl($dst$$XMMRegister, $src$$Register); 3409 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3410 %} 3411 ins_pipe( pipe_slow ); 3412 %} 3413 3414 instruct Repl4I_mem(vecX dst, memory mem) %{ 3415 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3416 match(Set dst (ReplicateI (LoadI mem))); 3417 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3418 ins_encode %{ 3419 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3420 %} 3421 ins_pipe( pipe_slow ); 3422 %} 3423 3424 instruct Repl8I(vecY dst, rRegI src) %{ 3425 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3426 match(Set dst (ReplicateI src)); 3427 format %{ "movd $dst,$src\n\t" 3428 "pshufd $dst,$dst,0x00\n\t" 3429 "vinserti128_high $dst,$dst\t! replicate8I" %} 3430 ins_encode %{ 3431 __ movdl($dst$$XMMRegister, $src$$Register); 3432 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3433 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3434 %} 3435 ins_pipe( pipe_slow ); 3436 %} 3437 3438 instruct Repl8I_mem(vecY dst, memory mem) %{ 3439 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3440 match(Set dst (ReplicateI (LoadI mem))); 3441 format %{ "pshufd $dst,$mem,0x00\n\t" 3442 "vinserti128_high $dst,$dst\t! replicate8I" %} 3443 ins_encode %{ 3444 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3445 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3446 %} 3447 ins_pipe( pipe_slow ); 3448 %} 3449 3450 instruct Repl16I(legVecZ dst, rRegI src) %{ 3451 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3452 match(Set dst (ReplicateI src)); 3453 format %{ "movd $dst,$src\n\t" 3454 "pshufd $dst,$dst,0x00\n\t" 3455 "vinserti128_high $dst,$dst\t" 3456 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3457 ins_encode %{ 3458 __ movdl($dst$$XMMRegister, $src$$Register); 3459 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3460 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3461 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3462 %} 3463 ins_pipe( pipe_slow ); 3464 %} 3465 3466 instruct Repl16I_mem(legVecZ dst, memory mem) %{ 3467 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3468 match(Set dst (ReplicateI (LoadI mem))); 3469 format %{ "pshufd $dst,$mem,0x00\n\t" 3470 "vinserti128_high $dst,$dst\t" 3471 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %} 3472 ins_encode %{ 3473 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3474 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3475 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3476 %} 3477 ins_pipe( pipe_slow ); 3478 %} 3479 3480 instruct Repl4I_imm(vecX dst, immI con) %{ 3481 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3482 match(Set dst (ReplicateI con)); 3483 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3484 "punpcklqdq $dst,$dst" %} 3485 ins_encode %{ 3486 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3487 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3488 %} 3489 ins_pipe( pipe_slow ); 3490 %} 3491 3492 instruct Repl8I_imm(vecY dst, immI con) %{ 3493 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3494 match(Set dst (ReplicateI con)); 3495 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3496 "punpcklqdq $dst,$dst\n\t" 3497 "vinserti128_high $dst,$dst" %} 3498 ins_encode %{ 3499 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3500 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3501 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3502 %} 3503 ins_pipe( pipe_slow ); 3504 %} 3505 3506 instruct Repl16I_imm(legVecZ dst, immI con) %{ 3507 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3508 match(Set dst (ReplicateI con)); 3509 format %{ "movq $dst,[$constantaddress]\t" 3510 "punpcklqdq $dst,$dst\n\t" 3511 "vinserti128_high $dst,$dst" 3512 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %} 3513 ins_encode %{ 3514 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3515 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3516 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3517 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3518 %} 3519 ins_pipe( pipe_slow ); 3520 %} 3521 3522 // Long could be loaded into xmm register directly from memory. 3523 instruct Repl2L_mem(vecX dst, memory mem) %{ 3524 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3525 match(Set dst (ReplicateL (LoadL mem))); 3526 format %{ "movq $dst,$mem\n\t" 3527 "punpcklqdq $dst,$dst\t! replicate2L" %} 3528 ins_encode %{ 3529 __ movq($dst$$XMMRegister, $mem$$Address); 3530 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3531 %} 3532 ins_pipe( pipe_slow ); 3533 %} 3534 3535 // Replicate long (8 byte) scalar to be vector 3536 #ifdef _LP64 3537 instruct Repl4L(vecY dst, rRegL src) %{ 3538 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3539 match(Set dst (ReplicateL src)); 3540 format %{ "movdq $dst,$src\n\t" 3541 "punpcklqdq $dst,$dst\n\t" 3542 "vinserti128_high $dst,$dst\t! replicate4L" %} 3543 ins_encode %{ 3544 __ movdq($dst$$XMMRegister, $src$$Register); 3545 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3546 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3547 %} 3548 ins_pipe( pipe_slow ); 3549 %} 3550 3551 instruct Repl8L(legVecZ dst, rRegL src) %{ 3552 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3553 match(Set dst (ReplicateL src)); 3554 format %{ "movdq $dst,$src\n\t" 3555 "punpcklqdq $dst,$dst\n\t" 3556 "vinserti128_high $dst,$dst\t" 3557 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3558 ins_encode %{ 3559 __ movdq($dst$$XMMRegister, $src$$Register); 3560 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3561 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3562 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3563 %} 3564 ins_pipe( pipe_slow ); 3565 %} 3566 #else // _LP64 3567 instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{ 3568 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3569 match(Set dst (ReplicateL src)); 3570 effect(TEMP dst, USE src, TEMP tmp); 3571 format %{ "movdl $dst,$src.lo\n\t" 3572 "movdl $tmp,$src.hi\n\t" 3573 "punpckldq $dst,$tmp\n\t" 3574 "punpcklqdq $dst,$dst\n\t" 3575 "vinserti128_high $dst,$dst\t! replicate4L" %} 3576 ins_encode %{ 3577 __ movdl($dst$$XMMRegister, $src$$Register); 3578 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3579 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3580 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3581 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3582 %} 3583 ins_pipe( pipe_slow ); 3584 %} 3585 3586 instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{ 3587 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3588 match(Set dst (ReplicateL src)); 3589 effect(TEMP dst, USE src, TEMP tmp); 3590 format %{ "movdl $dst,$src.lo\n\t" 3591 "movdl $tmp,$src.hi\n\t" 3592 "punpckldq $dst,$tmp\n\t" 3593 "punpcklqdq $dst,$dst\n\t" 3594 "vinserti128_high $dst,$dst\t" 3595 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3596 ins_encode %{ 3597 __ movdl($dst$$XMMRegister, $src$$Register); 3598 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3599 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3600 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3601 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3602 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3603 %} 3604 ins_pipe( pipe_slow ); 3605 %} 3606 #endif // _LP64 3607 3608 instruct Repl4L_imm(vecY dst, immL con) %{ 3609 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3610 match(Set dst (ReplicateL con)); 3611 format %{ "movq $dst,[$constantaddress]\n\t" 3612 "punpcklqdq $dst,$dst\n\t" 3613 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3614 ins_encode %{ 3615 __ movq($dst$$XMMRegister, $constantaddress($con)); 3616 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3617 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3618 %} 3619 ins_pipe( pipe_slow ); 3620 %} 3621 3622 instruct Repl8L_imm(legVecZ dst, immL con) %{ 3623 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3624 match(Set dst (ReplicateL con)); 3625 format %{ "movq $dst,[$constantaddress]\n\t" 3626 "punpcklqdq $dst,$dst\n\t" 3627 "vinserti128_high $dst,$dst\t" 3628 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %} 3629 ins_encode %{ 3630 __ movq($dst$$XMMRegister, $constantaddress($con)); 3631 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3632 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3633 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3634 %} 3635 ins_pipe( pipe_slow ); 3636 %} 3637 3638 instruct Repl4L_mem(vecY dst, memory mem) %{ 3639 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3640 match(Set dst (ReplicateL (LoadL mem))); 3641 format %{ "movq $dst,$mem\n\t" 3642 "punpcklqdq $dst,$dst\n\t" 3643 "vinserti128_high $dst,$dst\t! replicate4L" %} 3644 ins_encode %{ 3645 __ movq($dst$$XMMRegister, $mem$$Address); 3646 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3647 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 instruct Repl8L_mem(legVecZ dst, memory mem) %{ 3653 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3654 match(Set dst (ReplicateL (LoadL mem))); 3655 format %{ "movq $dst,$mem\n\t" 3656 "punpcklqdq $dst,$dst\n\t" 3657 "vinserti128_high $dst,$dst\t" 3658 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %} 3659 ins_encode %{ 3660 __ movq($dst$$XMMRegister, $mem$$Address); 3661 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3662 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3663 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3664 %} 3665 ins_pipe( pipe_slow ); 3666 %} 3667 3668 instruct Repl2F_mem(vecD dst, memory mem) %{ 3669 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3670 match(Set dst (ReplicateF (LoadF mem))); 3671 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3672 ins_encode %{ 3673 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3674 %} 3675 ins_pipe( pipe_slow ); 3676 %} 3677 3678 instruct Repl4F_mem(vecX dst, memory mem) %{ 3679 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3680 match(Set dst (ReplicateF (LoadF mem))); 3681 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3682 ins_encode %{ 3683 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3684 %} 3685 ins_pipe( pipe_slow ); 3686 %} 3687 3688 instruct Repl8F(vecY dst, vlRegF src) %{ 3689 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3690 match(Set dst (ReplicateF src)); 3691 format %{ "pshufd $dst,$src,0x00\n\t" 3692 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3693 ins_encode %{ 3694 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3695 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct Repl8F_mem(vecY dst, memory mem) %{ 3701 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3702 match(Set dst (ReplicateF (LoadF mem))); 3703 format %{ "pshufd $dst,$mem,0x00\n\t" 3704 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3705 ins_encode %{ 3706 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3707 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3708 %} 3709 ins_pipe( pipe_slow ); 3710 %} 3711 3712 instruct Repl16F(legVecZ dst, vlRegF src) %{ 3713 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3714 match(Set dst (ReplicateF src)); 3715 format %{ "pshufd $dst,$src,0x00\n\t" 3716 "vinsertf128_high $dst,$dst\t" 3717 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3718 ins_encode %{ 3719 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3720 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3721 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3722 %} 3723 ins_pipe( pipe_slow ); 3724 %} 3725 3726 instruct Repl16F_mem(legVecZ dst, memory mem) %{ 3727 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl()); 3728 match(Set dst (ReplicateF (LoadF mem))); 3729 format %{ "pshufd $dst,$mem,0x00\n\t" 3730 "vinsertf128_high $dst,$dst\t" 3731 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %} 3732 ins_encode %{ 3733 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3734 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3735 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3741 predicate(n->as_Vector()->length() == 2); 3742 match(Set dst (ReplicateF zero)); 3743 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3744 ins_encode %{ 3745 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3746 %} 3747 ins_pipe( fpu_reg_reg ); 3748 %} 3749 3750 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3751 predicate(n->as_Vector()->length() == 4); 3752 match(Set dst (ReplicateF zero)); 3753 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3754 ins_encode %{ 3755 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3756 %} 3757 ins_pipe( fpu_reg_reg ); 3758 %} 3759 3760 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3761 predicate(n->as_Vector()->length() == 8 && UseAVX > 0); 3762 match(Set dst (ReplicateF zero)); 3763 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3764 ins_encode %{ 3765 int vector_len = 1; 3766 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3767 %} 3768 ins_pipe( fpu_reg_reg ); 3769 %} 3770 3771 instruct Repl2D_mem(vecX dst, memory mem) %{ 3772 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3773 match(Set dst (ReplicateD (LoadD mem))); 3774 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3775 ins_encode %{ 3776 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3777 %} 3778 ins_pipe( pipe_slow ); 3779 %} 3780 3781 instruct Repl4D(vecY dst, vlRegD src) %{ 3782 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3783 match(Set dst (ReplicateD src)); 3784 format %{ "pshufd $dst,$src,0x44\n\t" 3785 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3786 ins_encode %{ 3787 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3788 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3789 %} 3790 ins_pipe( pipe_slow ); 3791 %} 3792 3793 instruct Repl4D_mem(vecY dst, memory mem) %{ 3794 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3795 match(Set dst (ReplicateD (LoadD mem))); 3796 format %{ "pshufd $dst,$mem,0x44\n\t" 3797 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3798 ins_encode %{ 3799 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3800 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3801 %} 3802 ins_pipe( pipe_slow ); 3803 %} 3804 3805 instruct Repl8D(legVecZ dst, vlRegD src) %{ 3806 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3807 match(Set dst (ReplicateD src)); 3808 format %{ "pshufd $dst,$src,0x44\n\t" 3809 "vinsertf128_high $dst,$dst\t" 3810 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3811 ins_encode %{ 3812 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3813 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3814 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3815 %} 3816 ins_pipe( pipe_slow ); 3817 %} 3818 3819 instruct Repl8D_mem(legVecZ dst, memory mem) %{ 3820 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3821 match(Set dst (ReplicateD (LoadD mem))); 3822 format %{ "pshufd $dst,$mem,0x44\n\t" 3823 "vinsertf128_high $dst,$dst\t" 3824 "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %} 3825 ins_encode %{ 3826 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3827 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3828 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 3829 %} 3830 ins_pipe( pipe_slow ); 3831 %} 3832 3833 // Replicate double (8 byte) scalar zero to be vector 3834 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3835 predicate(n->as_Vector()->length() == 2); 3836 match(Set dst (ReplicateD zero)); 3837 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3838 ins_encode %{ 3839 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3840 %} 3841 ins_pipe( fpu_reg_reg ); 3842 %} 3843 3844 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3845 predicate(n->as_Vector()->length() == 4 && UseAVX > 0); 3846 match(Set dst (ReplicateD zero)); 3847 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3848 ins_encode %{ 3849 int vector_len = 1; 3850 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3851 %} 3852 ins_pipe( fpu_reg_reg ); 3853 %} 3854 3855 // ====================GENERIC REPLICATE========================================== 3856 3857 // Replicate byte scalar to be vector 3858 instruct Repl4B(vecS dst, rRegI src) %{ 3859 predicate(n->as_Vector()->length() == 4); 3860 match(Set dst (ReplicateB src)); 3861 format %{ "movd $dst,$src\n\t" 3862 "punpcklbw $dst,$dst\n\t" 3863 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3864 ins_encode %{ 3865 __ movdl($dst$$XMMRegister, $src$$Register); 3866 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3867 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 instruct Repl8B(vecD dst, rRegI src) %{ 3873 predicate(n->as_Vector()->length() == 8); 3874 match(Set dst (ReplicateB src)); 3875 format %{ "movd $dst,$src\n\t" 3876 "punpcklbw $dst,$dst\n\t" 3877 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3878 ins_encode %{ 3879 __ movdl($dst$$XMMRegister, $src$$Register); 3880 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3881 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3882 %} 3883 ins_pipe( pipe_slow ); 3884 %} 3885 3886 // Replicate byte scalar immediate to be vector by loading from const table. 3887 instruct Repl4B_imm(vecS dst, immI con) %{ 3888 predicate(n->as_Vector()->length() == 4); 3889 match(Set dst (ReplicateB con)); 3890 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3891 ins_encode %{ 3892 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3893 %} 3894 ins_pipe( pipe_slow ); 3895 %} 3896 3897 instruct Repl8B_imm(vecD dst, immI con) %{ 3898 predicate(n->as_Vector()->length() == 8); 3899 match(Set dst (ReplicateB con)); 3900 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3901 ins_encode %{ 3902 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3903 %} 3904 ins_pipe( pipe_slow ); 3905 %} 3906 3907 // Replicate byte scalar zero to be vector 3908 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3909 predicate(n->as_Vector()->length() == 4); 3910 match(Set dst (ReplicateB zero)); 3911 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3912 ins_encode %{ 3913 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3914 %} 3915 ins_pipe( fpu_reg_reg ); 3916 %} 3917 3918 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3919 predicate(n->as_Vector()->length() == 8); 3920 match(Set dst (ReplicateB zero)); 3921 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3922 ins_encode %{ 3923 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3924 %} 3925 ins_pipe( fpu_reg_reg ); 3926 %} 3927 3928 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3929 predicate(n->as_Vector()->length() == 16); 3930 match(Set dst (ReplicateB zero)); 3931 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3932 ins_encode %{ 3933 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3934 %} 3935 ins_pipe( fpu_reg_reg ); 3936 %} 3937 3938 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3939 predicate(n->as_Vector()->length() == 32); 3940 match(Set dst (ReplicateB zero)); 3941 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3942 ins_encode %{ 3943 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3944 int vector_len = 1; 3945 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3946 %} 3947 ins_pipe( fpu_reg_reg ); 3948 %} 3949 3950 // Replicate char/short (2 byte) scalar to be vector 3951 instruct Repl2S(vecS dst, rRegI src) %{ 3952 predicate(n->as_Vector()->length() == 2); 3953 match(Set dst (ReplicateS src)); 3954 format %{ "movd $dst,$src\n\t" 3955 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3956 ins_encode %{ 3957 __ movdl($dst$$XMMRegister, $src$$Register); 3958 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3959 %} 3960 ins_pipe( fpu_reg_reg ); 3961 %} 3962 3963 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3964 instruct Repl2S_imm(vecS dst, immI con) %{ 3965 predicate(n->as_Vector()->length() == 2); 3966 match(Set dst (ReplicateS con)); 3967 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3968 ins_encode %{ 3969 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3970 %} 3971 ins_pipe( fpu_reg_reg ); 3972 %} 3973 3974 instruct Repl4S_imm(vecD dst, immI con) %{ 3975 predicate(n->as_Vector()->length() == 4); 3976 match(Set dst (ReplicateS con)); 3977 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3978 ins_encode %{ 3979 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3980 %} 3981 ins_pipe( fpu_reg_reg ); 3982 %} 3983 3984 // Replicate char/short (2 byte) scalar zero to be vector 3985 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3986 predicate(n->as_Vector()->length() == 2); 3987 match(Set dst (ReplicateS zero)); 3988 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3989 ins_encode %{ 3990 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3991 %} 3992 ins_pipe( fpu_reg_reg ); 3993 %} 3994 3995 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3996 predicate(n->as_Vector()->length() == 4); 3997 match(Set dst (ReplicateS zero)); 3998 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3999 ins_encode %{ 4000 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4001 %} 4002 ins_pipe( fpu_reg_reg ); 4003 %} 4004 4005 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4006 predicate(n->as_Vector()->length() == 8); 4007 match(Set dst (ReplicateS zero)); 4008 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4009 ins_encode %{ 4010 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4011 %} 4012 ins_pipe( fpu_reg_reg ); 4013 %} 4014 4015 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4016 predicate(n->as_Vector()->length() == 16); 4017 match(Set dst (ReplicateS zero)); 4018 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4019 ins_encode %{ 4020 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4021 int vector_len = 1; 4022 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4023 %} 4024 ins_pipe( fpu_reg_reg ); 4025 %} 4026 4027 // Replicate integer (4 byte) scalar to be vector 4028 instruct Repl2I(vecD dst, rRegI src) %{ 4029 predicate(n->as_Vector()->length() == 2); 4030 match(Set dst (ReplicateI src)); 4031 format %{ "movd $dst,$src\n\t" 4032 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4033 ins_encode %{ 4034 __ movdl($dst$$XMMRegister, $src$$Register); 4035 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4036 %} 4037 ins_pipe( fpu_reg_reg ); 4038 %} 4039 4040 // Integer could be loaded into xmm register directly from memory. 4041 instruct Repl2I_mem(vecD dst, memory mem) %{ 4042 predicate(n->as_Vector()->length() == 2); 4043 match(Set dst (ReplicateI (LoadI mem))); 4044 format %{ "movd $dst,$mem\n\t" 4045 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4046 ins_encode %{ 4047 __ movdl($dst$$XMMRegister, $mem$$Address); 4048 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4049 %} 4050 ins_pipe( fpu_reg_reg ); 4051 %} 4052 4053 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4054 instruct Repl2I_imm(vecD dst, immI con) %{ 4055 predicate(n->as_Vector()->length() == 2); 4056 match(Set dst (ReplicateI con)); 4057 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4058 ins_encode %{ 4059 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4060 %} 4061 ins_pipe( fpu_reg_reg ); 4062 %} 4063 4064 // Replicate integer (4 byte) scalar zero to be vector 4065 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4066 predicate(n->as_Vector()->length() == 2); 4067 match(Set dst (ReplicateI zero)); 4068 format %{ "pxor $dst,$dst\t! replicate2I" %} 4069 ins_encode %{ 4070 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4071 %} 4072 ins_pipe( fpu_reg_reg ); 4073 %} 4074 4075 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4076 predicate(n->as_Vector()->length() == 4); 4077 match(Set dst (ReplicateI zero)); 4078 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4079 ins_encode %{ 4080 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4081 %} 4082 ins_pipe( fpu_reg_reg ); 4083 %} 4084 4085 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4086 predicate(n->as_Vector()->length() == 8); 4087 match(Set dst (ReplicateI zero)); 4088 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4089 ins_encode %{ 4090 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4091 int vector_len = 1; 4092 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4093 %} 4094 ins_pipe( fpu_reg_reg ); 4095 %} 4096 4097 // Replicate long (8 byte) scalar to be vector 4098 #ifdef _LP64 4099 instruct Repl2L(vecX dst, rRegL src) %{ 4100 predicate(n->as_Vector()->length() == 2); 4101 match(Set dst (ReplicateL src)); 4102 format %{ "movdq $dst,$src\n\t" 4103 "punpcklqdq $dst,$dst\t! replicate2L" %} 4104 ins_encode %{ 4105 __ movdq($dst$$XMMRegister, $src$$Register); 4106 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4107 %} 4108 ins_pipe( pipe_slow ); 4109 %} 4110 #else // _LP64 4111 instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{ 4112 predicate(n->as_Vector()->length() == 2); 4113 match(Set dst (ReplicateL src)); 4114 effect(TEMP dst, USE src, TEMP tmp); 4115 format %{ "movdl $dst,$src.lo\n\t" 4116 "movdl $tmp,$src.hi\n\t" 4117 "punpckldq $dst,$tmp\n\t" 4118 "punpcklqdq $dst,$dst\t! replicate2L"%} 4119 ins_encode %{ 4120 __ movdl($dst$$XMMRegister, $src$$Register); 4121 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4122 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4123 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 #endif // _LP64 4128 4129 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4130 instruct Repl2L_imm(vecX dst, immL con) %{ 4131 predicate(n->as_Vector()->length() == 2); 4132 match(Set dst (ReplicateL con)); 4133 format %{ "movq $dst,[$constantaddress]\n\t" 4134 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4135 ins_encode %{ 4136 __ movq($dst$$XMMRegister, $constantaddress($con)); 4137 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4138 %} 4139 ins_pipe( pipe_slow ); 4140 %} 4141 4142 // Replicate long (8 byte) scalar zero to be vector 4143 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4144 predicate(n->as_Vector()->length() == 2); 4145 match(Set dst (ReplicateL zero)); 4146 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4147 ins_encode %{ 4148 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4149 %} 4150 ins_pipe( fpu_reg_reg ); 4151 %} 4152 4153 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4154 predicate(n->as_Vector()->length() == 4); 4155 match(Set dst (ReplicateL zero)); 4156 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4157 ins_encode %{ 4158 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4159 int vector_len = 1; 4160 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4161 %} 4162 ins_pipe( fpu_reg_reg ); 4163 %} 4164 4165 // Replicate float (4 byte) scalar to be vector 4166 instruct Repl2F(vecD dst, vlRegF src) %{ 4167 predicate(n->as_Vector()->length() == 2); 4168 match(Set dst (ReplicateF src)); 4169 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4170 ins_encode %{ 4171 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4172 %} 4173 ins_pipe( fpu_reg_reg ); 4174 %} 4175 4176 instruct Repl4F(vecX dst, vlRegF src) %{ 4177 predicate(n->as_Vector()->length() == 4); 4178 match(Set dst (ReplicateF src)); 4179 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4180 ins_encode %{ 4181 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 // Replicate double (8 bytes) scalar to be vector 4187 instruct Repl2D(vecX dst, vlRegD src) %{ 4188 predicate(n->as_Vector()->length() == 2); 4189 match(Set dst (ReplicateD src)); 4190 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4191 ins_encode %{ 4192 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 // ====================EVEX REPLICATE============================================= 4198 4199 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4200 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4201 match(Set dst (ReplicateB (LoadB mem))); 4202 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4203 ins_encode %{ 4204 int vector_len = 0; 4205 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4206 %} 4207 ins_pipe( pipe_slow ); 4208 %} 4209 4210 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4211 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4212 match(Set dst (ReplicateB (LoadB mem))); 4213 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4214 ins_encode %{ 4215 int vector_len = 0; 4216 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4217 %} 4218 ins_pipe( pipe_slow ); 4219 %} 4220 4221 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4222 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4223 match(Set dst (ReplicateB src)); 4224 format %{ "evpbroadcastb $dst,$src\t! replicate16B" %} 4225 ins_encode %{ 4226 int vector_len = 0; 4227 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4233 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4234 match(Set dst (ReplicateB (LoadB mem))); 4235 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4236 ins_encode %{ 4237 int vector_len = 0; 4238 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4239 %} 4240 ins_pipe( pipe_slow ); 4241 %} 4242 4243 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4244 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4245 match(Set dst (ReplicateB src)); 4246 format %{ "evpbroadcastb $dst,$src\t! replicate32B" %} 4247 ins_encode %{ 4248 int vector_len = 1; 4249 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4255 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4256 match(Set dst (ReplicateB (LoadB mem))); 4257 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4258 ins_encode %{ 4259 int vector_len = 1; 4260 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4261 %} 4262 ins_pipe( pipe_slow ); 4263 %} 4264 4265 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4266 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4267 match(Set dst (ReplicateB src)); 4268 format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %} 4269 ins_encode %{ 4270 int vector_len = 2; 4271 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4272 %} 4273 ins_pipe( pipe_slow ); 4274 %} 4275 4276 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4277 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4278 match(Set dst (ReplicateB (LoadB mem))); 4279 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4280 ins_encode %{ 4281 int vector_len = 2; 4282 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4288 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4289 match(Set dst (ReplicateB con)); 4290 format %{ "movq $dst,[$constantaddress]\n\t" 4291 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4292 ins_encode %{ 4293 int vector_len = 0; 4294 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4295 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4296 %} 4297 ins_pipe( pipe_slow ); 4298 %} 4299 4300 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4301 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4302 match(Set dst (ReplicateB con)); 4303 format %{ "movq $dst,[$constantaddress]\n\t" 4304 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4305 ins_encode %{ 4306 int vector_len = 1; 4307 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4308 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4309 %} 4310 ins_pipe( pipe_slow ); 4311 %} 4312 4313 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4314 predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4315 match(Set dst (ReplicateB con)); 4316 format %{ "movq $dst,[$constantaddress]\n\t" 4317 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4318 ins_encode %{ 4319 int vector_len = 2; 4320 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4321 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4322 %} 4323 ins_pipe( pipe_slow ); 4324 %} 4325 4326 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4327 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4328 match(Set dst (ReplicateB zero)); 4329 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4330 ins_encode %{ 4331 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4332 int vector_len = 2; 4333 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4334 %} 4335 ins_pipe( fpu_reg_reg ); 4336 %} 4337 4338 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4339 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4340 match(Set dst (ReplicateS src)); 4341 format %{ "evpbroadcastw $dst,$src\t! replicate4S" %} 4342 ins_encode %{ 4343 int vector_len = 0; 4344 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4345 %} 4346 ins_pipe( pipe_slow ); 4347 %} 4348 4349 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4350 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4351 match(Set dst (ReplicateS (LoadS mem))); 4352 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4353 ins_encode %{ 4354 int vector_len = 0; 4355 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4356 %} 4357 ins_pipe( pipe_slow ); 4358 %} 4359 4360 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4361 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4362 match(Set dst (ReplicateS src)); 4363 format %{ "evpbroadcastw $dst,$src\t! replicate8S" %} 4364 ins_encode %{ 4365 int vector_len = 0; 4366 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4367 %} 4368 ins_pipe( pipe_slow ); 4369 %} 4370 4371 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4372 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4373 match(Set dst (ReplicateS (LoadS mem))); 4374 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4375 ins_encode %{ 4376 int vector_len = 0; 4377 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4378 %} 4379 ins_pipe( pipe_slow ); 4380 %} 4381 4382 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4383 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4384 match(Set dst (ReplicateS src)); 4385 format %{ "evpbroadcastw $dst,$src\t! replicate16S" %} 4386 ins_encode %{ 4387 int vector_len = 1; 4388 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4389 %} 4390 ins_pipe( pipe_slow ); 4391 %} 4392 4393 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4394 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4395 match(Set dst (ReplicateS (LoadS mem))); 4396 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4397 ins_encode %{ 4398 int vector_len = 1; 4399 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4400 %} 4401 ins_pipe( pipe_slow ); 4402 %} 4403 4404 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4405 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4406 match(Set dst (ReplicateS src)); 4407 format %{ "evpbroadcastw $dst,$src\t! replicate32S" %} 4408 ins_encode %{ 4409 int vector_len = 2; 4410 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4411 %} 4412 ins_pipe( pipe_slow ); 4413 %} 4414 4415 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4416 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4417 match(Set dst (ReplicateS (LoadS mem))); 4418 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4419 ins_encode %{ 4420 int vector_len = 2; 4421 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4422 %} 4423 ins_pipe( pipe_slow ); 4424 %} 4425 4426 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4427 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4428 match(Set dst (ReplicateS con)); 4429 format %{ "movq $dst,[$constantaddress]\n\t" 4430 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4431 ins_encode %{ 4432 int vector_len = 0; 4433 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4434 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4435 %} 4436 ins_pipe( pipe_slow ); 4437 %} 4438 4439 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4440 predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw()); 4441 match(Set dst (ReplicateS con)); 4442 format %{ "movq $dst,[$constantaddress]\n\t" 4443 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4444 ins_encode %{ 4445 int vector_len = 1; 4446 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4447 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4448 %} 4449 ins_pipe( pipe_slow ); 4450 %} 4451 4452 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4453 predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw()); 4454 match(Set dst (ReplicateS con)); 4455 format %{ "movq $dst,[$constantaddress]\n\t" 4456 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4457 ins_encode %{ 4458 int vector_len = 2; 4459 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4460 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4461 %} 4462 ins_pipe( pipe_slow ); 4463 %} 4464 4465 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4466 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4467 match(Set dst (ReplicateS zero)); 4468 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4469 ins_encode %{ 4470 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4471 int vector_len = 2; 4472 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4473 %} 4474 ins_pipe( fpu_reg_reg ); 4475 %} 4476 4477 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4478 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4479 match(Set dst (ReplicateI src)); 4480 format %{ "evpbroadcastd $dst,$src\t! replicate4I" %} 4481 ins_encode %{ 4482 int vector_len = 0; 4483 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4484 %} 4485 ins_pipe( pipe_slow ); 4486 %} 4487 4488 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4489 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4490 match(Set dst (ReplicateI (LoadI mem))); 4491 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4492 ins_encode %{ 4493 int vector_len = 0; 4494 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4495 %} 4496 ins_pipe( pipe_slow ); 4497 %} 4498 4499 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4500 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4501 match(Set dst (ReplicateI src)); 4502 format %{ "evpbroadcastd $dst,$src\t! replicate8I" %} 4503 ins_encode %{ 4504 int vector_len = 1; 4505 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4506 %} 4507 ins_pipe( pipe_slow ); 4508 %} 4509 4510 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4511 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4512 match(Set dst (ReplicateI (LoadI mem))); 4513 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4514 ins_encode %{ 4515 int vector_len = 1; 4516 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4517 %} 4518 ins_pipe( pipe_slow ); 4519 %} 4520 4521 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4522 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4523 match(Set dst (ReplicateI src)); 4524 format %{ "evpbroadcastd $dst,$src\t! replicate16I" %} 4525 ins_encode %{ 4526 int vector_len = 2; 4527 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4528 %} 4529 ins_pipe( pipe_slow ); 4530 %} 4531 4532 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4533 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4534 match(Set dst (ReplicateI (LoadI mem))); 4535 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4536 ins_encode %{ 4537 int vector_len = 2; 4538 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4539 %} 4540 ins_pipe( pipe_slow ); 4541 %} 4542 4543 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4544 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4545 match(Set dst (ReplicateI con)); 4546 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4547 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4548 ins_encode %{ 4549 int vector_len = 0; 4550 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4551 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4552 %} 4553 ins_pipe( pipe_slow ); 4554 %} 4555 4556 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4557 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4558 match(Set dst (ReplicateI con)); 4559 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4560 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4561 ins_encode %{ 4562 int vector_len = 1; 4563 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4564 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4570 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4571 match(Set dst (ReplicateI con)); 4572 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4573 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4574 ins_encode %{ 4575 int vector_len = 2; 4576 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4577 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4578 %} 4579 ins_pipe( pipe_slow ); 4580 %} 4581 4582 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4583 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4584 match(Set dst (ReplicateI zero)); 4585 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4586 ins_encode %{ 4587 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4588 int vector_len = 2; 4589 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4590 %} 4591 ins_pipe( fpu_reg_reg ); 4592 %} 4593 4594 // Replicate long (8 byte) scalar to be vector 4595 #ifdef _LP64 4596 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4597 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4598 match(Set dst (ReplicateL src)); 4599 format %{ "evpbroadcastq $dst,$src\t! replicate4L" %} 4600 ins_encode %{ 4601 int vector_len = 1; 4602 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4603 %} 4604 ins_pipe( pipe_slow ); 4605 %} 4606 4607 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4608 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4609 match(Set dst (ReplicateL src)); 4610 format %{ "evpbroadcastq $dst,$src\t! replicate8L" %} 4611 ins_encode %{ 4612 int vector_len = 2; 4613 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4614 %} 4615 ins_pipe( pipe_slow ); 4616 %} 4617 #else // _LP64 4618 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4619 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4620 match(Set dst (ReplicateL src)); 4621 effect(TEMP dst, USE src, TEMP tmp); 4622 format %{ "movdl $dst,$src.lo\n\t" 4623 "movdl $tmp,$src.hi\n\t" 4624 "punpckldq $dst,$tmp\n\t" 4625 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4626 ins_encode %{ 4627 int vector_len = 1; 4628 __ movdl($dst$$XMMRegister, $src$$Register); 4629 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4630 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4631 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4632 %} 4633 ins_pipe( pipe_slow ); 4634 %} 4635 4636 instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{ 4637 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4638 match(Set dst (ReplicateL src)); 4639 effect(TEMP dst, USE src, TEMP tmp); 4640 format %{ "movdl $dst,$src.lo\n\t" 4641 "movdl $tmp,$src.hi\n\t" 4642 "punpckldq $dst,$tmp\n\t" 4643 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4644 ins_encode %{ 4645 int vector_len = 2; 4646 __ movdl($dst$$XMMRegister, $src$$Register); 4647 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4648 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4649 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4650 %} 4651 ins_pipe( pipe_slow ); 4652 %} 4653 #endif // _LP64 4654 4655 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4656 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4657 match(Set dst (ReplicateL con)); 4658 format %{ "movq $dst,[$constantaddress]\n\t" 4659 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4660 ins_encode %{ 4661 int vector_len = 1; 4662 __ movq($dst$$XMMRegister, $constantaddress($con)); 4663 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 4668 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4669 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4670 match(Set dst (ReplicateL con)); 4671 format %{ "movq $dst,[$constantaddress]\n\t" 4672 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4673 ins_encode %{ 4674 int vector_len = 2; 4675 __ movq($dst$$XMMRegister, $constantaddress($con)); 4676 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4677 %} 4678 ins_pipe( pipe_slow ); 4679 %} 4680 4681 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4682 predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4683 match(Set dst (ReplicateL (LoadL mem))); 4684 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4685 ins_encode %{ 4686 int vector_len = 0; 4687 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4688 %} 4689 ins_pipe( pipe_slow ); 4690 %} 4691 4692 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4693 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4694 match(Set dst (ReplicateL (LoadL mem))); 4695 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4696 ins_encode %{ 4697 int vector_len = 1; 4698 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4699 %} 4700 ins_pipe( pipe_slow ); 4701 %} 4702 4703 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4704 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4705 match(Set dst (ReplicateL (LoadL mem))); 4706 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4707 ins_encode %{ 4708 int vector_len = 2; 4709 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4710 %} 4711 ins_pipe( pipe_slow ); 4712 %} 4713 4714 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4715 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4716 match(Set dst (ReplicateL zero)); 4717 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4718 ins_encode %{ 4719 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4720 int vector_len = 2; 4721 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4722 %} 4723 ins_pipe( fpu_reg_reg ); 4724 %} 4725 4726 instruct Repl8F_evex(vecY dst, regF src) %{ 4727 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4728 match(Set dst (ReplicateF src)); 4729 format %{ "vpbroadcastss $dst,$src\t! replicate8F" %} 4730 ins_encode %{ 4731 int vector_len = 1; 4732 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4738 predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4739 match(Set dst (ReplicateF (LoadF mem))); 4740 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4741 ins_encode %{ 4742 int vector_len = 1; 4743 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4744 %} 4745 ins_pipe( pipe_slow ); 4746 %} 4747 4748 instruct Repl16F_evex(vecZ dst, regF src) %{ 4749 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4750 match(Set dst (ReplicateF src)); 4751 format %{ "vpbroadcastss $dst,$src\t! replicate16F" %} 4752 ins_encode %{ 4753 int vector_len = 2; 4754 __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4755 %} 4756 ins_pipe( pipe_slow ); 4757 %} 4758 4759 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4760 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4761 match(Set dst (ReplicateF (LoadF mem))); 4762 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4763 ins_encode %{ 4764 int vector_len = 2; 4765 __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4771 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4772 match(Set dst (ReplicateF zero)); 4773 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4774 ins_encode %{ 4775 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4776 int vector_len = 2; 4777 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4778 %} 4779 ins_pipe( fpu_reg_reg ); 4780 %} 4781 4782 instruct Repl4D_evex(vecY dst, regD src) %{ 4783 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4784 match(Set dst (ReplicateD src)); 4785 format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %} 4786 ins_encode %{ 4787 int vector_len = 1; 4788 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4789 %} 4790 ins_pipe( pipe_slow ); 4791 %} 4792 4793 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4794 predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl()); 4795 match(Set dst (ReplicateD (LoadD mem))); 4796 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4797 ins_encode %{ 4798 int vector_len = 1; 4799 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4800 %} 4801 ins_pipe( pipe_slow ); 4802 %} 4803 4804 instruct Repl8D_evex(vecZ dst, regD src) %{ 4805 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4806 match(Set dst (ReplicateD src)); 4807 format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %} 4808 ins_encode %{ 4809 int vector_len = 2; 4810 __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4811 %} 4812 ins_pipe( pipe_slow ); 4813 %} 4814 4815 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4816 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4817 match(Set dst (ReplicateD (LoadD mem))); 4818 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4819 ins_encode %{ 4820 int vector_len = 2; 4821 __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4822 %} 4823 ins_pipe( pipe_slow ); 4824 %} 4825 4826 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4827 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4828 match(Set dst (ReplicateD zero)); 4829 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4830 ins_encode %{ 4831 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4832 int vector_len = 2; 4833 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4834 %} 4835 ins_pipe( fpu_reg_reg ); 4836 %} 4837 4838 // ====================REDUCTION ARITHMETIC======================================= 4839 4840 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4841 predicate(UseSSE > 2 && UseAVX == 0); 4842 match(Set dst (AddReductionVI src1 src2)); 4843 effect(TEMP tmp2, TEMP tmp); 4844 format %{ "movdqu $tmp2,$src2\n\t" 4845 "phaddd $tmp2,$tmp2\n\t" 4846 "movd $tmp,$src1\n\t" 4847 "paddd $tmp,$tmp2\n\t" 4848 "movd $dst,$tmp\t! add reduction2I" %} 4849 ins_encode %{ 4850 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4851 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4852 __ movdl($tmp$$XMMRegister, $src1$$Register); 4853 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4854 __ movdl($dst$$Register, $tmp$$XMMRegister); 4855 %} 4856 ins_pipe( pipe_slow ); 4857 %} 4858 4859 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4860 predicate(VM_Version::supports_avxonly()); 4861 match(Set dst (AddReductionVI src1 src2)); 4862 effect(TEMP tmp, TEMP tmp2); 4863 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4864 "movd $tmp2,$src1\n\t" 4865 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4866 "movd $dst,$tmp2\t! add reduction2I" %} 4867 ins_encode %{ 4868 int vector_len = 0; 4869 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4870 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4871 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4872 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4873 %} 4874 ins_pipe( pipe_slow ); 4875 %} 4876 4877 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 4878 predicate(UseAVX > 2); 4879 match(Set dst (AddReductionVI src1 src2)); 4880 effect(TEMP tmp, TEMP tmp2); 4881 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4882 "vpaddd $tmp,$src2,$tmp2\n\t" 4883 "movd $tmp2,$src1\n\t" 4884 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4885 "movd $dst,$tmp2\t! add reduction2I" %} 4886 ins_encode %{ 4887 int vector_len = 0; 4888 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4889 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4890 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4891 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4892 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4893 %} 4894 ins_pipe( pipe_slow ); 4895 %} 4896 4897 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4898 predicate(UseSSE > 2 && UseAVX == 0); 4899 match(Set dst (AddReductionVI src1 src2)); 4900 effect(TEMP tmp, TEMP tmp2); 4901 format %{ "movdqu $tmp,$src2\n\t" 4902 "phaddd $tmp,$tmp\n\t" 4903 "phaddd $tmp,$tmp\n\t" 4904 "movd $tmp2,$src1\n\t" 4905 "paddd $tmp2,$tmp\n\t" 4906 "movd $dst,$tmp2\t! add reduction4I" %} 4907 ins_encode %{ 4908 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4909 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4910 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4911 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4912 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4913 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4914 %} 4915 ins_pipe( pipe_slow ); 4916 %} 4917 4918 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4919 predicate(VM_Version::supports_avxonly()); 4920 match(Set dst (AddReductionVI src1 src2)); 4921 effect(TEMP tmp, TEMP tmp2); 4922 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4923 "vphaddd $tmp,$tmp,$tmp\n\t" 4924 "movd $tmp2,$src1\n\t" 4925 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4926 "movd $dst,$tmp2\t! add reduction4I" %} 4927 ins_encode %{ 4928 int vector_len = 0; 4929 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4930 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4931 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4932 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4933 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 4939 predicate(UseAVX > 2); 4940 match(Set dst (AddReductionVI src1 src2)); 4941 effect(TEMP tmp, TEMP tmp2); 4942 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4943 "vpaddd $tmp,$src2,$tmp2\n\t" 4944 "pshufd $tmp2,$tmp,0x1\n\t" 4945 "vpaddd $tmp,$tmp,$tmp2\n\t" 4946 "movd $tmp2,$src1\n\t" 4947 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4948 "movd $dst,$tmp2\t! add reduction4I" %} 4949 ins_encode %{ 4950 int vector_len = 0; 4951 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4952 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4953 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4954 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4955 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4956 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4957 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4958 %} 4959 ins_pipe( pipe_slow ); 4960 %} 4961 4962 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 4963 predicate(VM_Version::supports_avxonly()); 4964 match(Set dst (AddReductionVI src1 src2)); 4965 effect(TEMP tmp, TEMP tmp2); 4966 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4967 "vphaddd $tmp,$tmp,$tmp2\n\t" 4968 "vextracti128_high $tmp2,$tmp\n\t" 4969 "vpaddd $tmp,$tmp,$tmp2\n\t" 4970 "movd $tmp2,$src1\n\t" 4971 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4972 "movd $dst,$tmp2\t! add reduction8I" %} 4973 ins_encode %{ 4974 int vector_len = 1; 4975 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4976 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4977 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4978 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4979 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4980 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4981 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4982 %} 4983 ins_pipe( pipe_slow ); 4984 %} 4985 4986 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 4987 predicate(UseAVX > 2); 4988 match(Set dst (AddReductionVI src1 src2)); 4989 effect(TEMP tmp, TEMP tmp2); 4990 format %{ "vextracti128_high $tmp,$src2\n\t" 4991 "vpaddd $tmp,$tmp,$src2\n\t" 4992 "pshufd $tmp2,$tmp,0xE\n\t" 4993 "vpaddd $tmp,$tmp,$tmp2\n\t" 4994 "pshufd $tmp2,$tmp,0x1\n\t" 4995 "vpaddd $tmp,$tmp,$tmp2\n\t" 4996 "movd $tmp2,$src1\n\t" 4997 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4998 "movd $dst,$tmp2\t! add reduction8I" %} 4999 ins_encode %{ 5000 int vector_len = 0; 5001 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5002 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5003 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5004 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5005 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5006 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5007 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5008 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5009 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5010 %} 5011 ins_pipe( pipe_slow ); 5012 %} 5013 5014 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5015 predicate(UseAVX > 2); 5016 match(Set dst (AddReductionVI src1 src2)); 5017 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5018 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5019 "vpaddd $tmp3,$tmp3,$src2\n\t" 5020 "vextracti128_high $tmp,$tmp3\n\t" 5021 "vpaddd $tmp,$tmp,$tmp3\n\t" 5022 "pshufd $tmp2,$tmp,0xE\n\t" 5023 "vpaddd $tmp,$tmp,$tmp2\n\t" 5024 "pshufd $tmp2,$tmp,0x1\n\t" 5025 "vpaddd $tmp,$tmp,$tmp2\n\t" 5026 "movd $tmp2,$src1\n\t" 5027 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5028 "movd $dst,$tmp2\t! mul reduction16I" %} 5029 ins_encode %{ 5030 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5031 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5032 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5033 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5034 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5035 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5036 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5037 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5038 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5039 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5040 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5041 %} 5042 ins_pipe( pipe_slow ); 5043 %} 5044 5045 #ifdef _LP64 5046 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5047 predicate(UseAVX > 2); 5048 match(Set dst (AddReductionVL src1 src2)); 5049 effect(TEMP tmp, TEMP tmp2); 5050 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5051 "vpaddq $tmp,$src2,$tmp2\n\t" 5052 "movdq $tmp2,$src1\n\t" 5053 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5054 "movdq $dst,$tmp2\t! add reduction2L" %} 5055 ins_encode %{ 5056 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5057 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5058 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5059 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5060 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5061 %} 5062 ins_pipe( pipe_slow ); 5063 %} 5064 5065 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5066 predicate(UseAVX > 2); 5067 match(Set dst (AddReductionVL src1 src2)); 5068 effect(TEMP tmp, TEMP tmp2); 5069 format %{ "vextracti128_high $tmp,$src2\n\t" 5070 "vpaddq $tmp2,$tmp,$src2\n\t" 5071 "pshufd $tmp,$tmp2,0xE\n\t" 5072 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5073 "movdq $tmp,$src1\n\t" 5074 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5075 "movdq $dst,$tmp2\t! add reduction4L" %} 5076 ins_encode %{ 5077 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5078 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5079 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5080 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5081 __ movdq($tmp$$XMMRegister, $src1$$Register); 5082 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5083 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5084 %} 5085 ins_pipe( pipe_slow ); 5086 %} 5087 5088 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5089 predicate(UseAVX > 2); 5090 match(Set dst (AddReductionVL src1 src2)); 5091 effect(TEMP tmp, TEMP tmp2); 5092 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5093 "vpaddq $tmp2,$tmp2,$src2\n\t" 5094 "vextracti128_high $tmp,$tmp2\n\t" 5095 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5096 "pshufd $tmp,$tmp2,0xE\n\t" 5097 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5098 "movdq $tmp,$src1\n\t" 5099 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5100 "movdq $dst,$tmp2\t! add reduction8L" %} 5101 ins_encode %{ 5102 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5103 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5104 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5105 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5106 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5107 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5108 __ movdq($tmp$$XMMRegister, $src1$$Register); 5109 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5110 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5111 %} 5112 ins_pipe( pipe_slow ); 5113 %} 5114 #endif 5115 5116 instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5117 predicate(UseSSE >= 1 && UseAVX == 0); 5118 match(Set dst (AddReductionVF dst src2)); 5119 effect(TEMP dst, TEMP tmp); 5120 format %{ "addss $dst,$src2\n\t" 5121 "pshufd $tmp,$src2,0x01\n\t" 5122 "addss $dst,$tmp\t! add reduction2F" %} 5123 ins_encode %{ 5124 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5125 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5126 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5127 %} 5128 ins_pipe( pipe_slow ); 5129 %} 5130 5131 instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5132 predicate(UseAVX > 0); 5133 match(Set dst (AddReductionVF dst src2)); 5134 effect(TEMP dst, TEMP tmp); 5135 format %{ "vaddss $dst,$dst,$src2\n\t" 5136 "pshufd $tmp,$src2,0x01\n\t" 5137 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5138 ins_encode %{ 5139 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5140 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5141 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5142 %} 5143 ins_pipe( pipe_slow ); 5144 %} 5145 5146 instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5147 predicate(UseSSE >= 1 && UseAVX == 0); 5148 match(Set dst (AddReductionVF dst src2)); 5149 effect(TEMP dst, TEMP tmp); 5150 format %{ "addss $dst,$src2\n\t" 5151 "pshufd $tmp,$src2,0x01\n\t" 5152 "addss $dst,$tmp\n\t" 5153 "pshufd $tmp,$src2,0x02\n\t" 5154 "addss $dst,$tmp\n\t" 5155 "pshufd $tmp,$src2,0x03\n\t" 5156 "addss $dst,$tmp\t! add reduction4F" %} 5157 ins_encode %{ 5158 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5159 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5160 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5161 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5162 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5163 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5164 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5165 %} 5166 ins_pipe( pipe_slow ); 5167 %} 5168 5169 instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5170 predicate(UseAVX > 0); 5171 match(Set dst (AddReductionVF dst src2)); 5172 effect(TEMP tmp, TEMP dst); 5173 format %{ "vaddss $dst,dst,$src2\n\t" 5174 "pshufd $tmp,$src2,0x01\n\t" 5175 "vaddss $dst,$dst,$tmp\n\t" 5176 "pshufd $tmp,$src2,0x02\n\t" 5177 "vaddss $dst,$dst,$tmp\n\t" 5178 "pshufd $tmp,$src2,0x03\n\t" 5179 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5180 ins_encode %{ 5181 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5182 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5183 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5184 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5185 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5186 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5187 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5188 %} 5189 ins_pipe( pipe_slow ); 5190 %} 5191 5192 instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5193 predicate(UseAVX > 0); 5194 match(Set dst (AddReductionVF dst src2)); 5195 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5196 format %{ "vaddss $dst,$dst,$src2\n\t" 5197 "pshufd $tmp,$src2,0x01\n\t" 5198 "vaddss $dst,$dst,$tmp\n\t" 5199 "pshufd $tmp,$src2,0x02\n\t" 5200 "vaddss $dst,$dst,$tmp\n\t" 5201 "pshufd $tmp,$src2,0x03\n\t" 5202 "vaddss $dst,$dst,$tmp\n\t" 5203 "vextractf128_high $tmp2,$src2\n\t" 5204 "vaddss $dst,$dst,$tmp2\n\t" 5205 "pshufd $tmp,$tmp2,0x01\n\t" 5206 "vaddss $dst,$dst,$tmp\n\t" 5207 "pshufd $tmp,$tmp2,0x02\n\t" 5208 "vaddss $dst,$dst,$tmp\n\t" 5209 "pshufd $tmp,$tmp2,0x03\n\t" 5210 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5211 ins_encode %{ 5212 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5213 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5214 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5215 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5216 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5217 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5218 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5219 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5220 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5221 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5222 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5223 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5224 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5225 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5226 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5227 %} 5228 ins_pipe( pipe_slow ); 5229 %} 5230 5231 instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5232 predicate(UseAVX > 2); 5233 match(Set dst (AddReductionVF dst src2)); 5234 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5235 format %{ "vaddss $dst,$dst,$src2\n\t" 5236 "pshufd $tmp,$src2,0x01\n\t" 5237 "vaddss $dst,$dst,$tmp\n\t" 5238 "pshufd $tmp,$src2,0x02\n\t" 5239 "vaddss $dst,$dst,$tmp\n\t" 5240 "pshufd $tmp,$src2,0x03\n\t" 5241 "vaddss $dst,$dst,$tmp\n\t" 5242 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5243 "vaddss $dst,$dst,$tmp2\n\t" 5244 "pshufd $tmp,$tmp2,0x01\n\t" 5245 "vaddss $dst,$dst,$tmp\n\t" 5246 "pshufd $tmp,$tmp2,0x02\n\t" 5247 "vaddss $dst,$dst,$tmp\n\t" 5248 "pshufd $tmp,$tmp2,0x03\n\t" 5249 "vaddss $dst,$dst,$tmp\n\t" 5250 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5251 "vaddss $dst,$dst,$tmp2\n\t" 5252 "pshufd $tmp,$tmp2,0x01\n\t" 5253 "vaddss $dst,$dst,$tmp\n\t" 5254 "pshufd $tmp,$tmp2,0x02\n\t" 5255 "vaddss $dst,$dst,$tmp\n\t" 5256 "pshufd $tmp,$tmp2,0x03\n\t" 5257 "vaddss $dst,$dst,$tmp\n\t" 5258 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5259 "vaddss $dst,$dst,$tmp2\n\t" 5260 "pshufd $tmp,$tmp2,0x01\n\t" 5261 "vaddss $dst,$dst,$tmp\n\t" 5262 "pshufd $tmp,$tmp2,0x02\n\t" 5263 "vaddss $dst,$dst,$tmp\n\t" 5264 "pshufd $tmp,$tmp2,0x03\n\t" 5265 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5266 ins_encode %{ 5267 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5268 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5269 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5270 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5271 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5272 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5273 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5274 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5275 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5276 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5277 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5278 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5279 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5280 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5281 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5282 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5283 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5284 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5285 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5286 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5287 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5288 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5289 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5290 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5291 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5292 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5293 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5294 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5295 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5296 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5297 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5298 %} 5299 ins_pipe( pipe_slow ); 5300 %} 5301 5302 instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5303 predicate(UseSSE >= 1 && UseAVX == 0); 5304 match(Set dst (AddReductionVD dst src2)); 5305 effect(TEMP tmp, TEMP dst); 5306 format %{ "addsd $dst,$src2\n\t" 5307 "pshufd $tmp,$src2,0xE\n\t" 5308 "addsd $dst,$tmp\t! add reduction2D" %} 5309 ins_encode %{ 5310 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5311 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5312 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5313 %} 5314 ins_pipe( pipe_slow ); 5315 %} 5316 5317 instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5318 predicate(UseAVX > 0); 5319 match(Set dst (AddReductionVD dst src2)); 5320 effect(TEMP tmp, TEMP dst); 5321 format %{ "vaddsd $dst,$dst,$src2\n\t" 5322 "pshufd $tmp,$src2,0xE\n\t" 5323 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5324 ins_encode %{ 5325 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5326 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5327 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5328 %} 5329 ins_pipe( pipe_slow ); 5330 %} 5331 5332 instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{ 5333 predicate(UseAVX > 0); 5334 match(Set dst (AddReductionVD dst src2)); 5335 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5336 format %{ "vaddsd $dst,$dst,$src2\n\t" 5337 "pshufd $tmp,$src2,0xE\n\t" 5338 "vaddsd $dst,$dst,$tmp\n\t" 5339 "vextractf128 $tmp2,$src2,0x1\n\t" 5340 "vaddsd $dst,$dst,$tmp2\n\t" 5341 "pshufd $tmp,$tmp2,0xE\n\t" 5342 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5343 ins_encode %{ 5344 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5345 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5346 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5347 __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5348 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5349 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5350 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5351 %} 5352 ins_pipe( pipe_slow ); 5353 %} 5354 5355 instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5356 predicate(UseAVX > 2); 5357 match(Set dst (AddReductionVD dst src2)); 5358 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5359 format %{ "vaddsd $dst,$dst,$src2\n\t" 5360 "pshufd $tmp,$src2,0xE\n\t" 5361 "vaddsd $dst,$dst,$tmp\n\t" 5362 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5363 "vaddsd $dst,$dst,$tmp2\n\t" 5364 "pshufd $tmp,$tmp2,0xE\n\t" 5365 "vaddsd $dst,$dst,$tmp\n\t" 5366 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5367 "vaddsd $dst,$dst,$tmp2\n\t" 5368 "pshufd $tmp,$tmp2,0xE\n\t" 5369 "vaddsd $dst,$dst,$tmp\n\t" 5370 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5371 "vaddsd $dst,$dst,$tmp2\n\t" 5372 "pshufd $tmp,$tmp2,0xE\n\t" 5373 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5374 ins_encode %{ 5375 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5376 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5377 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5378 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5379 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5380 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5381 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5382 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5383 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5384 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5385 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5386 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5387 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5388 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5389 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5395 predicate(UseSSE > 3 && UseAVX == 0); 5396 match(Set dst (MulReductionVI src1 src2)); 5397 effect(TEMP tmp, TEMP tmp2); 5398 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5399 "pmulld $tmp2,$src2\n\t" 5400 "movd $tmp,$src1\n\t" 5401 "pmulld $tmp2,$tmp\n\t" 5402 "movd $dst,$tmp2\t! mul reduction2I" %} 5403 ins_encode %{ 5404 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5405 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5406 __ movdl($tmp$$XMMRegister, $src1$$Register); 5407 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5408 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5409 %} 5410 ins_pipe( pipe_slow ); 5411 %} 5412 5413 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{ 5414 predicate(UseAVX > 0); 5415 match(Set dst (MulReductionVI src1 src2)); 5416 effect(TEMP tmp, TEMP tmp2); 5417 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5418 "vpmulld $tmp,$src2,$tmp2\n\t" 5419 "movd $tmp2,$src1\n\t" 5420 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5421 "movd $dst,$tmp2\t! mul reduction2I" %} 5422 ins_encode %{ 5423 int vector_len = 0; 5424 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5425 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5426 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5427 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5428 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5429 %} 5430 ins_pipe( pipe_slow ); 5431 %} 5432 5433 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5434 predicate(UseSSE > 3 && UseAVX == 0); 5435 match(Set dst (MulReductionVI src1 src2)); 5436 effect(TEMP tmp, TEMP tmp2); 5437 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5438 "pmulld $tmp2,$src2\n\t" 5439 "pshufd $tmp,$tmp2,0x1\n\t" 5440 "pmulld $tmp2,$tmp\n\t" 5441 "movd $tmp,$src1\n\t" 5442 "pmulld $tmp2,$tmp\n\t" 5443 "movd $dst,$tmp2\t! mul reduction4I" %} 5444 ins_encode %{ 5445 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5446 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5447 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5448 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5449 __ movdl($tmp$$XMMRegister, $src1$$Register); 5450 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5451 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5452 %} 5453 ins_pipe( pipe_slow ); 5454 %} 5455 5456 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{ 5457 predicate(UseAVX > 0); 5458 match(Set dst (MulReductionVI src1 src2)); 5459 effect(TEMP tmp, TEMP tmp2); 5460 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5461 "vpmulld $tmp,$src2,$tmp2\n\t" 5462 "pshufd $tmp2,$tmp,0x1\n\t" 5463 "vpmulld $tmp,$tmp,$tmp2\n\t" 5464 "movd $tmp2,$src1\n\t" 5465 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5466 "movd $dst,$tmp2\t! mul reduction4I" %} 5467 ins_encode %{ 5468 int vector_len = 0; 5469 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5470 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5471 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5472 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5473 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5474 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5475 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5476 %} 5477 ins_pipe( pipe_slow ); 5478 %} 5479 5480 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{ 5481 predicate(UseAVX > 1); 5482 match(Set dst (MulReductionVI src1 src2)); 5483 effect(TEMP tmp, TEMP tmp2); 5484 format %{ "vextracti128_high $tmp,$src2\n\t" 5485 "vpmulld $tmp,$tmp,$src2\n\t" 5486 "pshufd $tmp2,$tmp,0xE\n\t" 5487 "vpmulld $tmp,$tmp,$tmp2\n\t" 5488 "pshufd $tmp2,$tmp,0x1\n\t" 5489 "vpmulld $tmp,$tmp,$tmp2\n\t" 5490 "movd $tmp2,$src1\n\t" 5491 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5492 "movd $dst,$tmp2\t! mul reduction8I" %} 5493 ins_encode %{ 5494 int vector_len = 0; 5495 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5496 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5497 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5498 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5499 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5500 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5501 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5502 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5503 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{ 5509 predicate(UseAVX > 2); 5510 match(Set dst (MulReductionVI src1 src2)); 5511 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5512 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5513 "vpmulld $tmp3,$tmp3,$src2\n\t" 5514 "vextracti128_high $tmp,$tmp3\n\t" 5515 "vpmulld $tmp,$tmp,$src2\n\t" 5516 "pshufd $tmp2,$tmp,0xE\n\t" 5517 "vpmulld $tmp,$tmp,$tmp2\n\t" 5518 "pshufd $tmp2,$tmp,0x1\n\t" 5519 "vpmulld $tmp,$tmp,$tmp2\n\t" 5520 "movd $tmp2,$src1\n\t" 5521 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5522 "movd $dst,$tmp2\t! mul reduction16I" %} 5523 ins_encode %{ 5524 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5525 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5526 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5527 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5528 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5529 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5530 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5531 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5532 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5533 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5534 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5535 %} 5536 ins_pipe( pipe_slow ); 5537 %} 5538 5539 #ifdef _LP64 5540 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{ 5541 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5542 match(Set dst (MulReductionVL src1 src2)); 5543 effect(TEMP tmp, TEMP tmp2); 5544 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5545 "vpmullq $tmp,$src2,$tmp2\n\t" 5546 "movdq $tmp2,$src1\n\t" 5547 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5548 "movdq $dst,$tmp2\t! mul reduction2L" %} 5549 ins_encode %{ 5550 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5551 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5552 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5553 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5554 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5555 %} 5556 ins_pipe( pipe_slow ); 5557 %} 5558 5559 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{ 5560 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5561 match(Set dst (MulReductionVL src1 src2)); 5562 effect(TEMP tmp, TEMP tmp2); 5563 format %{ "vextracti128_high $tmp,$src2\n\t" 5564 "vpmullq $tmp2,$tmp,$src2\n\t" 5565 "pshufd $tmp,$tmp2,0xE\n\t" 5566 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5567 "movdq $tmp,$src1\n\t" 5568 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5569 "movdq $dst,$tmp2\t! mul reduction4L" %} 5570 ins_encode %{ 5571 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5572 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5573 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5574 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5575 __ movdq($tmp$$XMMRegister, $src1$$Register); 5576 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5577 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5583 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5584 match(Set dst (MulReductionVL src1 src2)); 5585 effect(TEMP tmp, TEMP tmp2); 5586 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5587 "vpmullq $tmp2,$tmp2,$src2\n\t" 5588 "vextracti128_high $tmp,$tmp2\n\t" 5589 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5590 "pshufd $tmp,$tmp2,0xE\n\t" 5591 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5592 "movdq $tmp,$src1\n\t" 5593 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5594 "movdq $dst,$tmp2\t! mul reduction8L" %} 5595 ins_encode %{ 5596 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5597 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5598 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5599 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5600 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5601 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5602 __ movdq($tmp$$XMMRegister, $src1$$Register); 5603 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5604 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5605 %} 5606 ins_pipe( pipe_slow ); 5607 %} 5608 #endif 5609 5610 instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{ 5611 predicate(UseSSE >= 1 && UseAVX == 0); 5612 match(Set dst (MulReductionVF dst src2)); 5613 effect(TEMP dst, TEMP tmp); 5614 format %{ "mulss $dst,$src2\n\t" 5615 "pshufd $tmp,$src2,0x01\n\t" 5616 "mulss $dst,$tmp\t! mul reduction2F" %} 5617 ins_encode %{ 5618 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5619 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5620 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{ 5626 predicate(UseAVX > 0); 5627 match(Set dst (MulReductionVF dst src2)); 5628 effect(TEMP tmp, TEMP dst); 5629 format %{ "vmulss $dst,$dst,$src2\n\t" 5630 "pshufd $tmp,$src2,0x01\n\t" 5631 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5632 ins_encode %{ 5633 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5634 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5635 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5636 %} 5637 ins_pipe( pipe_slow ); 5638 %} 5639 5640 instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5641 predicate(UseSSE >= 1 && UseAVX == 0); 5642 match(Set dst (MulReductionVF dst src2)); 5643 effect(TEMP dst, TEMP tmp); 5644 format %{ "mulss $dst,$src2\n\t" 5645 "pshufd $tmp,$src2,0x01\n\t" 5646 "mulss $dst,$tmp\n\t" 5647 "pshufd $tmp,$src2,0x02\n\t" 5648 "mulss $dst,$tmp\n\t" 5649 "pshufd $tmp,$src2,0x03\n\t" 5650 "mulss $dst,$tmp\t! mul reduction4F" %} 5651 ins_encode %{ 5652 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5653 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5654 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5655 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5656 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5657 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5658 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5659 %} 5660 ins_pipe( pipe_slow ); 5661 %} 5662 5663 instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{ 5664 predicate(UseAVX > 0); 5665 match(Set dst (MulReductionVF dst src2)); 5666 effect(TEMP tmp, TEMP dst); 5667 format %{ "vmulss $dst,$dst,$src2\n\t" 5668 "pshufd $tmp,$src2,0x01\n\t" 5669 "vmulss $dst,$dst,$tmp\n\t" 5670 "pshufd $tmp,$src2,0x02\n\t" 5671 "vmulss $dst,$dst,$tmp\n\t" 5672 "pshufd $tmp,$src2,0x03\n\t" 5673 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5674 ins_encode %{ 5675 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5676 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5677 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5678 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5679 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5680 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5681 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5682 %} 5683 ins_pipe( pipe_slow ); 5684 %} 5685 5686 instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{ 5687 predicate(UseAVX > 0); 5688 match(Set dst (MulReductionVF dst src2)); 5689 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5690 format %{ "vmulss $dst,$dst,$src2\n\t" 5691 "pshufd $tmp,$src2,0x01\n\t" 5692 "vmulss $dst,$dst,$tmp\n\t" 5693 "pshufd $tmp,$src2,0x02\n\t" 5694 "vmulss $dst,$dst,$tmp\n\t" 5695 "pshufd $tmp,$src2,0x03\n\t" 5696 "vmulss $dst,$dst,$tmp\n\t" 5697 "vextractf128_high $tmp2,$src2\n\t" 5698 "vmulss $dst,$dst,$tmp2\n\t" 5699 "pshufd $tmp,$tmp2,0x01\n\t" 5700 "vmulss $dst,$dst,$tmp\n\t" 5701 "pshufd $tmp,$tmp2,0x02\n\t" 5702 "vmulss $dst,$dst,$tmp\n\t" 5703 "pshufd $tmp,$tmp2,0x03\n\t" 5704 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5705 ins_encode %{ 5706 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5707 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5708 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5709 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5710 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5711 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5712 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5713 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5714 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5715 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5716 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5717 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5718 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5719 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5720 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5726 predicate(UseAVX > 2); 5727 match(Set dst (MulReductionVF dst src2)); 5728 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5729 format %{ "vmulss $dst,$dst,$src2\n\t" 5730 "pshufd $tmp,$src2,0x01\n\t" 5731 "vmulss $dst,$dst,$tmp\n\t" 5732 "pshufd $tmp,$src2,0x02\n\t" 5733 "vmulss $dst,$dst,$tmp\n\t" 5734 "pshufd $tmp,$src2,0x03\n\t" 5735 "vmulss $dst,$dst,$tmp\n\t" 5736 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5737 "vmulss $dst,$dst,$tmp2\n\t" 5738 "pshufd $tmp,$tmp2,0x01\n\t" 5739 "vmulss $dst,$dst,$tmp\n\t" 5740 "pshufd $tmp,$tmp2,0x02\n\t" 5741 "vmulss $dst,$dst,$tmp\n\t" 5742 "pshufd $tmp,$tmp2,0x03\n\t" 5743 "vmulss $dst,$dst,$tmp\n\t" 5744 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5745 "vmulss $dst,$dst,$tmp2\n\t" 5746 "pshufd $tmp,$tmp2,0x01\n\t" 5747 "vmulss $dst,$dst,$tmp\n\t" 5748 "pshufd $tmp,$tmp2,0x02\n\t" 5749 "vmulss $dst,$dst,$tmp\n\t" 5750 "pshufd $tmp,$tmp2,0x03\n\t" 5751 "vmulss $dst,$dst,$tmp\n\t" 5752 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5753 "vmulss $dst,$dst,$tmp2\n\t" 5754 "pshufd $tmp,$tmp2,0x01\n\t" 5755 "vmulss $dst,$dst,$tmp\n\t" 5756 "pshufd $tmp,$tmp2,0x02\n\t" 5757 "vmulss $dst,$dst,$tmp\n\t" 5758 "pshufd $tmp,$tmp2,0x03\n\t" 5759 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5760 ins_encode %{ 5761 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5762 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5763 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5764 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5765 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5766 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5767 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5768 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5769 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5770 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5771 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5772 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5773 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5774 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5775 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5776 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5777 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5778 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5779 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5780 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5781 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5782 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5783 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5784 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5785 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5786 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5787 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5788 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5789 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5790 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5791 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5797 predicate(UseSSE >= 1 && UseAVX == 0); 5798 match(Set dst (MulReductionVD dst src2)); 5799 effect(TEMP dst, TEMP tmp); 5800 format %{ "mulsd $dst,$src2\n\t" 5801 "pshufd $tmp,$src2,0xE\n\t" 5802 "mulsd $dst,$tmp\t! mul reduction2D" %} 5803 ins_encode %{ 5804 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5805 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5806 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5807 %} 5808 ins_pipe( pipe_slow ); 5809 %} 5810 5811 instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{ 5812 predicate(UseAVX > 0); 5813 match(Set dst (MulReductionVD dst src2)); 5814 effect(TEMP tmp, TEMP dst); 5815 format %{ "vmulsd $dst,$dst,$src2\n\t" 5816 "pshufd $tmp,$src2,0xE\n\t" 5817 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5818 ins_encode %{ 5819 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5820 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5821 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{ 5827 predicate(UseAVX > 0); 5828 match(Set dst (MulReductionVD dst src2)); 5829 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5830 format %{ "vmulsd $dst,$dst,$src2\n\t" 5831 "pshufd $tmp,$src2,0xE\n\t" 5832 "vmulsd $dst,$dst,$tmp\n\t" 5833 "vextractf128_high $tmp2,$src2\n\t" 5834 "vmulsd $dst,$dst,$tmp2\n\t" 5835 "pshufd $tmp,$tmp2,0xE\n\t" 5836 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5837 ins_encode %{ 5838 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5839 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5840 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5841 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5842 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5843 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5844 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5845 %} 5846 ins_pipe( pipe_slow ); 5847 %} 5848 5849 instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{ 5850 predicate(UseAVX > 2); 5851 match(Set dst (MulReductionVD dst src2)); 5852 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5853 format %{ "vmulsd $dst,$dst,$src2\n\t" 5854 "pshufd $tmp,$src2,0xE\n\t" 5855 "vmulsd $dst,$dst,$tmp\n\t" 5856 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5857 "vmulsd $dst,$dst,$tmp2\n\t" 5858 "pshufd $tmp,$src2,0xE\n\t" 5859 "vmulsd $dst,$dst,$tmp\n\t" 5860 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5861 "vmulsd $dst,$dst,$tmp2\n\t" 5862 "pshufd $tmp,$tmp2,0xE\n\t" 5863 "vmulsd $dst,$dst,$tmp\n\t" 5864 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5865 "vmulsd $dst,$dst,$tmp2\n\t" 5866 "pshufd $tmp,$tmp2,0xE\n\t" 5867 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5868 ins_encode %{ 5869 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5870 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5871 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5872 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5873 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5874 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5875 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5876 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5877 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5878 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5879 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5880 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5881 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5882 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5883 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5884 %} 5885 ins_pipe( pipe_slow ); 5886 %} 5887 5888 // ====================VECTOR ARITHMETIC======================================= 5889 5890 // --------------------------------- ADD -------------------------------------- 5891 5892 // Bytes vector add 5893 instruct vadd4B(vecS dst, vecS src) %{ 5894 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5895 match(Set dst (AddVB dst src)); 5896 format %{ "paddb $dst,$src\t! add packed4B" %} 5897 ins_encode %{ 5898 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5899 %} 5900 ins_pipe( pipe_slow ); 5901 %} 5902 5903 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 5904 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5905 match(Set dst (AddVB src1 src2)); 5906 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5907 ins_encode %{ 5908 int vector_len = 0; 5909 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 5915 instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ 5916 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 5917 match(Set dst (AddVB src (LoadVector mem))); 5918 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5919 ins_encode %{ 5920 int vector_len = 0; 5921 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5922 %} 5923 ins_pipe( pipe_slow ); 5924 %} 5925 5926 instruct vadd8B(vecD dst, vecD src) %{ 5927 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5928 match(Set dst (AddVB dst src)); 5929 format %{ "paddb $dst,$src\t! add packed8B" %} 5930 ins_encode %{ 5931 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 5937 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5938 match(Set dst (AddVB src1 src2)); 5939 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5940 ins_encode %{ 5941 int vector_len = 0; 5942 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5943 %} 5944 ins_pipe( pipe_slow ); 5945 %} 5946 5947 5948 instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ 5949 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 5950 match(Set dst (AddVB src (LoadVector mem))); 5951 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5952 ins_encode %{ 5953 int vector_len = 0; 5954 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 instruct vadd16B(vecX dst, vecX src) %{ 5960 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5961 match(Set dst (AddVB dst src)); 5962 format %{ "paddb $dst,$src\t! add packed16B" %} 5963 ins_encode %{ 5964 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5965 %} 5966 ins_pipe( pipe_slow ); 5967 %} 5968 5969 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 5970 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5971 match(Set dst (AddVB src1 src2)); 5972 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5973 ins_encode %{ 5974 int vector_len = 0; 5975 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5976 %} 5977 ins_pipe( pipe_slow ); 5978 %} 5979 5980 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 5981 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 5982 match(Set dst (AddVB src (LoadVector mem))); 5983 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5984 ins_encode %{ 5985 int vector_len = 0; 5986 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5987 %} 5988 ins_pipe( pipe_slow ); 5989 %} 5990 5991 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 5992 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 5993 match(Set dst (AddVB src1 src2)); 5994 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5995 ins_encode %{ 5996 int vector_len = 1; 5997 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5998 %} 5999 ins_pipe( pipe_slow ); 6000 %} 6001 6002 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 6003 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6004 match(Set dst (AddVB src (LoadVector mem))); 6005 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6006 ins_encode %{ 6007 int vector_len = 1; 6008 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6014 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6015 match(Set dst (AddVB src1 src2)); 6016 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6017 ins_encode %{ 6018 int vector_len = 2; 6019 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6025 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6026 match(Set dst (AddVB src (LoadVector mem))); 6027 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6028 ins_encode %{ 6029 int vector_len = 2; 6030 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 // Shorts/Chars vector add 6036 instruct vadd2S(vecS dst, vecS src) %{ 6037 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6038 match(Set dst (AddVS dst src)); 6039 format %{ "paddw $dst,$src\t! add packed2S" %} 6040 ins_encode %{ 6041 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 6047 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6048 match(Set dst (AddVS src1 src2)); 6049 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6050 ins_encode %{ 6051 int vector_len = 0; 6052 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6053 %} 6054 ins_pipe( pipe_slow ); 6055 %} 6056 6057 instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ 6058 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6059 match(Set dst (AddVS src (LoadVector mem))); 6060 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6061 ins_encode %{ 6062 int vector_len = 0; 6063 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 instruct vadd4S(vecD dst, vecD src) %{ 6069 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6070 match(Set dst (AddVS dst src)); 6071 format %{ "paddw $dst,$src\t! add packed4S" %} 6072 ins_encode %{ 6073 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6074 %} 6075 ins_pipe( pipe_slow ); 6076 %} 6077 6078 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 6079 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6080 match(Set dst (AddVS src1 src2)); 6081 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6082 ins_encode %{ 6083 int vector_len = 0; 6084 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6085 %} 6086 ins_pipe( pipe_slow ); 6087 %} 6088 6089 instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ 6090 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6091 match(Set dst (AddVS src (LoadVector mem))); 6092 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6093 ins_encode %{ 6094 int vector_len = 0; 6095 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6096 %} 6097 ins_pipe( pipe_slow ); 6098 %} 6099 6100 instruct vadd8S(vecX dst, vecX src) %{ 6101 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6102 match(Set dst (AddVS dst src)); 6103 format %{ "paddw $dst,$src\t! add packed8S" %} 6104 ins_encode %{ 6105 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 6111 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6112 match(Set dst (AddVS src1 src2)); 6113 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6114 ins_encode %{ 6115 int vector_len = 0; 6116 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6117 %} 6118 ins_pipe( pipe_slow ); 6119 %} 6120 6121 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 6122 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6123 match(Set dst (AddVS src (LoadVector mem))); 6124 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6125 ins_encode %{ 6126 int vector_len = 0; 6127 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6128 %} 6129 ins_pipe( pipe_slow ); 6130 %} 6131 6132 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 6133 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6134 match(Set dst (AddVS src1 src2)); 6135 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6136 ins_encode %{ 6137 int vector_len = 1; 6138 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6139 %} 6140 ins_pipe( pipe_slow ); 6141 %} 6142 6143 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 6144 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6145 match(Set dst (AddVS src (LoadVector mem))); 6146 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6147 ins_encode %{ 6148 int vector_len = 1; 6149 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6150 %} 6151 ins_pipe( pipe_slow ); 6152 %} 6153 6154 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6155 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6156 match(Set dst (AddVS src1 src2)); 6157 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6158 ins_encode %{ 6159 int vector_len = 2; 6160 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6161 %} 6162 ins_pipe( pipe_slow ); 6163 %} 6164 6165 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6166 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6167 match(Set dst (AddVS src (LoadVector mem))); 6168 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6169 ins_encode %{ 6170 int vector_len = 2; 6171 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6172 %} 6173 ins_pipe( pipe_slow ); 6174 %} 6175 6176 // Integers vector add 6177 instruct vadd2I(vecD dst, vecD src) %{ 6178 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6179 match(Set dst (AddVI dst src)); 6180 format %{ "paddd $dst,$src\t! add packed2I" %} 6181 ins_encode %{ 6182 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6188 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6189 match(Set dst (AddVI src1 src2)); 6190 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6191 ins_encode %{ 6192 int vector_len = 0; 6193 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6199 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6200 match(Set dst (AddVI src (LoadVector mem))); 6201 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6202 ins_encode %{ 6203 int vector_len = 0; 6204 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 instruct vadd4I(vecX dst, vecX src) %{ 6210 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6211 match(Set dst (AddVI dst src)); 6212 format %{ "paddd $dst,$src\t! add packed4I" %} 6213 ins_encode %{ 6214 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6220 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6221 match(Set dst (AddVI src1 src2)); 6222 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6223 ins_encode %{ 6224 int vector_len = 0; 6225 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6231 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6232 match(Set dst (AddVI src (LoadVector mem))); 6233 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6234 ins_encode %{ 6235 int vector_len = 0; 6236 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6242 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6243 match(Set dst (AddVI src1 src2)); 6244 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6245 ins_encode %{ 6246 int vector_len = 1; 6247 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6253 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6254 match(Set dst (AddVI src (LoadVector mem))); 6255 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6256 ins_encode %{ 6257 int vector_len = 1; 6258 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6259 %} 6260 ins_pipe( pipe_slow ); 6261 %} 6262 6263 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6264 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6265 match(Set dst (AddVI src1 src2)); 6266 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6267 ins_encode %{ 6268 int vector_len = 2; 6269 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6275 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6276 match(Set dst (AddVI src (LoadVector mem))); 6277 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6278 ins_encode %{ 6279 int vector_len = 2; 6280 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 // Longs vector add 6286 instruct vadd2L(vecX dst, vecX src) %{ 6287 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6288 match(Set dst (AddVL dst src)); 6289 format %{ "paddq $dst,$src\t! add packed2L" %} 6290 ins_encode %{ 6291 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6297 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6298 match(Set dst (AddVL src1 src2)); 6299 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6300 ins_encode %{ 6301 int vector_len = 0; 6302 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6308 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6309 match(Set dst (AddVL src (LoadVector mem))); 6310 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6311 ins_encode %{ 6312 int vector_len = 0; 6313 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6314 %} 6315 ins_pipe( pipe_slow ); 6316 %} 6317 6318 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6319 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6320 match(Set dst (AddVL src1 src2)); 6321 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6322 ins_encode %{ 6323 int vector_len = 1; 6324 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6325 %} 6326 ins_pipe( pipe_slow ); 6327 %} 6328 6329 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6330 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6331 match(Set dst (AddVL src (LoadVector mem))); 6332 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6333 ins_encode %{ 6334 int vector_len = 1; 6335 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6341 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6342 match(Set dst (AddVL src1 src2)); 6343 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6344 ins_encode %{ 6345 int vector_len = 2; 6346 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6352 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6353 match(Set dst (AddVL src (LoadVector mem))); 6354 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6355 ins_encode %{ 6356 int vector_len = 2; 6357 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 // Floats vector add 6363 instruct vadd2F(vecD dst, vecD src) %{ 6364 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6365 match(Set dst (AddVF dst src)); 6366 format %{ "addps $dst,$src\t! add packed2F" %} 6367 ins_encode %{ 6368 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6374 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6375 match(Set dst (AddVF src1 src2)); 6376 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6377 ins_encode %{ 6378 int vector_len = 0; 6379 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6380 %} 6381 ins_pipe( pipe_slow ); 6382 %} 6383 6384 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6385 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6386 match(Set dst (AddVF src (LoadVector mem))); 6387 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6388 ins_encode %{ 6389 int vector_len = 0; 6390 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct vadd4F(vecX dst, vecX src) %{ 6396 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6397 match(Set dst (AddVF dst src)); 6398 format %{ "addps $dst,$src\t! add packed4F" %} 6399 ins_encode %{ 6400 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6401 %} 6402 ins_pipe( pipe_slow ); 6403 %} 6404 6405 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6406 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6407 match(Set dst (AddVF src1 src2)); 6408 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6409 ins_encode %{ 6410 int vector_len = 0; 6411 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6417 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6418 match(Set dst (AddVF src (LoadVector mem))); 6419 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6420 ins_encode %{ 6421 int vector_len = 0; 6422 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6428 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6429 match(Set dst (AddVF src1 src2)); 6430 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6431 ins_encode %{ 6432 int vector_len = 1; 6433 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6434 %} 6435 ins_pipe( pipe_slow ); 6436 %} 6437 6438 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6439 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6440 match(Set dst (AddVF src (LoadVector mem))); 6441 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6442 ins_encode %{ 6443 int vector_len = 1; 6444 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6445 %} 6446 ins_pipe( pipe_slow ); 6447 %} 6448 6449 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6450 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6451 match(Set dst (AddVF src1 src2)); 6452 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6453 ins_encode %{ 6454 int vector_len = 2; 6455 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6456 %} 6457 ins_pipe( pipe_slow ); 6458 %} 6459 6460 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6461 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6462 match(Set dst (AddVF src (LoadVector mem))); 6463 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6464 ins_encode %{ 6465 int vector_len = 2; 6466 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 // Doubles vector add 6472 instruct vadd2D(vecX dst, vecX src) %{ 6473 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6474 match(Set dst (AddVD dst src)); 6475 format %{ "addpd $dst,$src\t! add packed2D" %} 6476 ins_encode %{ 6477 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6478 %} 6479 ins_pipe( pipe_slow ); 6480 %} 6481 6482 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6483 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6484 match(Set dst (AddVD src1 src2)); 6485 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6486 ins_encode %{ 6487 int vector_len = 0; 6488 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6494 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6495 match(Set dst (AddVD src (LoadVector mem))); 6496 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6497 ins_encode %{ 6498 int vector_len = 0; 6499 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6500 %} 6501 ins_pipe( pipe_slow ); 6502 %} 6503 6504 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6505 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6506 match(Set dst (AddVD src1 src2)); 6507 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6508 ins_encode %{ 6509 int vector_len = 1; 6510 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6511 %} 6512 ins_pipe( pipe_slow ); 6513 %} 6514 6515 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6516 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6517 match(Set dst (AddVD src (LoadVector mem))); 6518 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6519 ins_encode %{ 6520 int vector_len = 1; 6521 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6522 %} 6523 ins_pipe( pipe_slow ); 6524 %} 6525 6526 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6527 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6528 match(Set dst (AddVD src1 src2)); 6529 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6530 ins_encode %{ 6531 int vector_len = 2; 6532 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6533 %} 6534 ins_pipe( pipe_slow ); 6535 %} 6536 6537 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6538 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6539 match(Set dst (AddVD src (LoadVector mem))); 6540 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6541 ins_encode %{ 6542 int vector_len = 2; 6543 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 // --------------------------------- SUB -------------------------------------- 6549 6550 // Bytes vector sub 6551 instruct vsub4B(vecS dst, vecS src) %{ 6552 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6553 match(Set dst (SubVB dst src)); 6554 format %{ "psubb $dst,$src\t! sub packed4B" %} 6555 ins_encode %{ 6556 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6557 %} 6558 ins_pipe( pipe_slow ); 6559 %} 6560 6561 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 6562 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6563 match(Set dst (SubVB src1 src2)); 6564 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6565 ins_encode %{ 6566 int vector_len = 0; 6567 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6568 %} 6569 ins_pipe( pipe_slow ); 6570 %} 6571 6572 instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ 6573 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6574 match(Set dst (SubVB src (LoadVector mem))); 6575 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6576 ins_encode %{ 6577 int vector_len = 0; 6578 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6579 %} 6580 ins_pipe( pipe_slow ); 6581 %} 6582 6583 instruct vsub8B(vecD dst, vecD src) %{ 6584 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6585 match(Set dst (SubVB dst src)); 6586 format %{ "psubb $dst,$src\t! sub packed8B" %} 6587 ins_encode %{ 6588 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6589 %} 6590 ins_pipe( pipe_slow ); 6591 %} 6592 6593 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 6594 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6595 match(Set dst (SubVB src1 src2)); 6596 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6597 ins_encode %{ 6598 int vector_len = 0; 6599 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ 6605 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6606 match(Set dst (SubVB src (LoadVector mem))); 6607 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6608 ins_encode %{ 6609 int vector_len = 0; 6610 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6611 %} 6612 ins_pipe( pipe_slow ); 6613 %} 6614 6615 instruct vsub16B(vecX dst, vecX src) %{ 6616 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6617 match(Set dst (SubVB dst src)); 6618 format %{ "psubb $dst,$src\t! sub packed16B" %} 6619 ins_encode %{ 6620 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 6626 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6627 match(Set dst (SubVB src1 src2)); 6628 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6629 ins_encode %{ 6630 int vector_len = 0; 6631 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 6637 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 6638 match(Set dst (SubVB src (LoadVector mem))); 6639 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6640 ins_encode %{ 6641 int vector_len = 0; 6642 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6643 %} 6644 ins_pipe( pipe_slow ); 6645 %} 6646 6647 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 6648 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6649 match(Set dst (SubVB src1 src2)); 6650 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6651 ins_encode %{ 6652 int vector_len = 1; 6653 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6654 %} 6655 ins_pipe( pipe_slow ); 6656 %} 6657 6658 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 6659 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 6660 match(Set dst (SubVB src (LoadVector mem))); 6661 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6662 ins_encode %{ 6663 int vector_len = 1; 6664 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6665 %} 6666 ins_pipe( pipe_slow ); 6667 %} 6668 6669 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6670 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6671 match(Set dst (SubVB src1 src2)); 6672 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6673 ins_encode %{ 6674 int vector_len = 2; 6675 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6676 %} 6677 ins_pipe( pipe_slow ); 6678 %} 6679 6680 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6681 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6682 match(Set dst (SubVB src (LoadVector mem))); 6683 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6684 ins_encode %{ 6685 int vector_len = 2; 6686 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 // Shorts/Chars vector sub 6692 instruct vsub2S(vecS dst, vecS src) %{ 6693 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6694 match(Set dst (SubVS dst src)); 6695 format %{ "psubw $dst,$src\t! sub packed2S" %} 6696 ins_encode %{ 6697 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6698 %} 6699 ins_pipe( pipe_slow ); 6700 %} 6701 6702 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 6703 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6704 match(Set dst (SubVS src1 src2)); 6705 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6706 ins_encode %{ 6707 int vector_len = 0; 6708 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ 6714 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6715 match(Set dst (SubVS src (LoadVector mem))); 6716 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6717 ins_encode %{ 6718 int vector_len = 0; 6719 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6720 %} 6721 ins_pipe( pipe_slow ); 6722 %} 6723 6724 instruct vsub4S(vecD dst, vecD src) %{ 6725 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6726 match(Set dst (SubVS dst src)); 6727 format %{ "psubw $dst,$src\t! sub packed4S" %} 6728 ins_encode %{ 6729 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6730 %} 6731 ins_pipe( pipe_slow ); 6732 %} 6733 6734 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 6735 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6736 match(Set dst (SubVS src1 src2)); 6737 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6738 ins_encode %{ 6739 int vector_len = 0; 6740 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6741 %} 6742 ins_pipe( pipe_slow ); 6743 %} 6744 6745 instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ 6746 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6747 match(Set dst (SubVS src (LoadVector mem))); 6748 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6749 ins_encode %{ 6750 int vector_len = 0; 6751 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct vsub8S(vecX dst, vecX src) %{ 6757 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6758 match(Set dst (SubVS dst src)); 6759 format %{ "psubw $dst,$src\t! sub packed8S" %} 6760 ins_encode %{ 6761 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6762 %} 6763 ins_pipe( pipe_slow ); 6764 %} 6765 6766 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 6767 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6768 match(Set dst (SubVS src1 src2)); 6769 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6770 ins_encode %{ 6771 int vector_len = 0; 6772 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6773 %} 6774 ins_pipe( pipe_slow ); 6775 %} 6776 6777 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 6778 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6779 match(Set dst (SubVS src (LoadVector mem))); 6780 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6781 ins_encode %{ 6782 int vector_len = 0; 6783 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6784 %} 6785 ins_pipe( pipe_slow ); 6786 %} 6787 6788 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 6789 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6790 match(Set dst (SubVS src1 src2)); 6791 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 6792 ins_encode %{ 6793 int vector_len = 1; 6794 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6795 %} 6796 ins_pipe( pipe_slow ); 6797 %} 6798 6799 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 6800 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 6801 match(Set dst (SubVS src (LoadVector mem))); 6802 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 6803 ins_encode %{ 6804 int vector_len = 1; 6805 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6806 %} 6807 ins_pipe( pipe_slow ); 6808 %} 6809 6810 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6811 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6812 match(Set dst (SubVS src1 src2)); 6813 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 6814 ins_encode %{ 6815 int vector_len = 2; 6816 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 6822 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6823 match(Set dst (SubVS src (LoadVector mem))); 6824 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 6825 ins_encode %{ 6826 int vector_len = 2; 6827 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6828 %} 6829 ins_pipe( pipe_slow ); 6830 %} 6831 6832 // Integers vector sub 6833 instruct vsub2I(vecD dst, vecD src) %{ 6834 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6835 match(Set dst (SubVI dst src)); 6836 format %{ "psubd $dst,$src\t! sub packed2I" %} 6837 ins_encode %{ 6838 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6839 %} 6840 ins_pipe( pipe_slow ); 6841 %} 6842 6843 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 6844 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6845 match(Set dst (SubVI src1 src2)); 6846 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 6847 ins_encode %{ 6848 int vector_len = 0; 6849 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6850 %} 6851 ins_pipe( pipe_slow ); 6852 %} 6853 6854 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 6855 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6856 match(Set dst (SubVI src (LoadVector mem))); 6857 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 6858 ins_encode %{ 6859 int vector_len = 0; 6860 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6861 %} 6862 ins_pipe( pipe_slow ); 6863 %} 6864 6865 instruct vsub4I(vecX dst, vecX src) %{ 6866 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6867 match(Set dst (SubVI dst src)); 6868 format %{ "psubd $dst,$src\t! sub packed4I" %} 6869 ins_encode %{ 6870 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 6876 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6877 match(Set dst (SubVI src1 src2)); 6878 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 6879 ins_encode %{ 6880 int vector_len = 0; 6881 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6882 %} 6883 ins_pipe( pipe_slow ); 6884 %} 6885 6886 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 6887 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6888 match(Set dst (SubVI src (LoadVector mem))); 6889 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 6890 ins_encode %{ 6891 int vector_len = 0; 6892 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6893 %} 6894 ins_pipe( pipe_slow ); 6895 %} 6896 6897 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 6898 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6899 match(Set dst (SubVI src1 src2)); 6900 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 6901 ins_encode %{ 6902 int vector_len = 1; 6903 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 6909 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6910 match(Set dst (SubVI src (LoadVector mem))); 6911 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 6912 ins_encode %{ 6913 int vector_len = 1; 6914 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6920 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6921 match(Set dst (SubVI src1 src2)); 6922 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 6923 ins_encode %{ 6924 int vector_len = 2; 6925 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 6931 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6932 match(Set dst (SubVI src (LoadVector mem))); 6933 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 6934 ins_encode %{ 6935 int vector_len = 2; 6936 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6937 %} 6938 ins_pipe( pipe_slow ); 6939 %} 6940 6941 // Longs vector sub 6942 instruct vsub2L(vecX dst, vecX src) %{ 6943 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6944 match(Set dst (SubVL dst src)); 6945 format %{ "psubq $dst,$src\t! sub packed2L" %} 6946 ins_encode %{ 6947 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6948 %} 6949 ins_pipe( pipe_slow ); 6950 %} 6951 6952 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 6953 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6954 match(Set dst (SubVL src1 src2)); 6955 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 6956 ins_encode %{ 6957 int vector_len = 0; 6958 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 6964 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6965 match(Set dst (SubVL src (LoadVector mem))); 6966 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 6975 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6976 match(Set dst (SubVL src1 src2)); 6977 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 6978 ins_encode %{ 6979 int vector_len = 1; 6980 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 6986 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6987 match(Set dst (SubVL src (LoadVector mem))); 6988 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 6989 ins_encode %{ 6990 int vector_len = 1; 6991 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6997 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6998 match(Set dst (SubVL src1 src2)); 6999 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7000 ins_encode %{ 7001 int vector_len = 2; 7002 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7008 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7009 match(Set dst (SubVL src (LoadVector mem))); 7010 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7011 ins_encode %{ 7012 int vector_len = 2; 7013 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 // Floats vector sub 7019 instruct vsub2F(vecD dst, vecD src) %{ 7020 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7021 match(Set dst (SubVF dst src)); 7022 format %{ "subps $dst,$src\t! sub packed2F" %} 7023 ins_encode %{ 7024 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7030 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7031 match(Set dst (SubVF src1 src2)); 7032 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7033 ins_encode %{ 7034 int vector_len = 0; 7035 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7036 %} 7037 ins_pipe( pipe_slow ); 7038 %} 7039 7040 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7041 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7042 match(Set dst (SubVF src (LoadVector mem))); 7043 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7044 ins_encode %{ 7045 int vector_len = 0; 7046 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vsub4F(vecX dst, vecX src) %{ 7052 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7053 match(Set dst (SubVF dst src)); 7054 format %{ "subps $dst,$src\t! sub packed4F" %} 7055 ins_encode %{ 7056 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7062 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7063 match(Set dst (SubVF src1 src2)); 7064 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7065 ins_encode %{ 7066 int vector_len = 0; 7067 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7068 %} 7069 ins_pipe( pipe_slow ); 7070 %} 7071 7072 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7073 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7074 match(Set dst (SubVF src (LoadVector mem))); 7075 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7076 ins_encode %{ 7077 int vector_len = 0; 7078 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7084 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7085 match(Set dst (SubVF src1 src2)); 7086 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7087 ins_encode %{ 7088 int vector_len = 1; 7089 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7090 %} 7091 ins_pipe( pipe_slow ); 7092 %} 7093 7094 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7095 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7096 match(Set dst (SubVF src (LoadVector mem))); 7097 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7098 ins_encode %{ 7099 int vector_len = 1; 7100 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7101 %} 7102 ins_pipe( pipe_slow ); 7103 %} 7104 7105 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7106 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7107 match(Set dst (SubVF src1 src2)); 7108 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7109 ins_encode %{ 7110 int vector_len = 2; 7111 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7117 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7118 match(Set dst (SubVF src (LoadVector mem))); 7119 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7120 ins_encode %{ 7121 int vector_len = 2; 7122 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 // Doubles vector sub 7128 instruct vsub2D(vecX dst, vecX src) %{ 7129 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7130 match(Set dst (SubVD dst src)); 7131 format %{ "subpd $dst,$src\t! sub packed2D" %} 7132 ins_encode %{ 7133 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7134 %} 7135 ins_pipe( pipe_slow ); 7136 %} 7137 7138 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7139 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7140 match(Set dst (SubVD src1 src2)); 7141 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7142 ins_encode %{ 7143 int vector_len = 0; 7144 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7145 %} 7146 ins_pipe( pipe_slow ); 7147 %} 7148 7149 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7150 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7151 match(Set dst (SubVD src (LoadVector mem))); 7152 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7153 ins_encode %{ 7154 int vector_len = 0; 7155 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7161 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7162 match(Set dst (SubVD src1 src2)); 7163 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7164 ins_encode %{ 7165 int vector_len = 1; 7166 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7172 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7173 match(Set dst (SubVD src (LoadVector mem))); 7174 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7175 ins_encode %{ 7176 int vector_len = 1; 7177 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7183 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7184 match(Set dst (SubVD src1 src2)); 7185 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7186 ins_encode %{ 7187 int vector_len = 2; 7188 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7189 %} 7190 ins_pipe( pipe_slow ); 7191 %} 7192 7193 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7194 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7195 match(Set dst (SubVD src (LoadVector mem))); 7196 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7197 ins_encode %{ 7198 int vector_len = 2; 7199 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 // --------------------------------- MUL -------------------------------------- 7205 7206 // Byte vector mul 7207 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp, rRegI scratch) %{ 7208 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7209 match(Set dst (MulVB src1 src2)); 7210 effect(TEMP dst, TEMP tmp, TEMP scratch); 7211 format %{"pmovsxbw $tmp,$src1\n\t" 7212 "pmovsxbw $dst,$src2\n\t" 7213 "pmullw $tmp,$dst\n\t" 7214 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7215 "pand $dst,$tmp\n\t" 7216 "packuswb $dst,$dst\t! mul packed4B" %} 7217 ins_encode %{ 7218 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7219 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7220 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7221 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7222 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7223 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7224 %} 7225 ins_pipe( pipe_slow ); 7226 %} 7227 7228 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp, rRegI scratch) %{ 7229 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 7230 match(Set dst (MulVB src1 src2)); 7231 effect(TEMP dst, TEMP tmp, TEMP scratch); 7232 format %{"pmovsxbw $tmp,$src1\n\t" 7233 "pmovsxbw $dst,$src2\n\t" 7234 "pmullw $tmp,$dst\n\t" 7235 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7236 "pand $dst,$tmp\n\t" 7237 "packuswb $dst,$dst\t! mul packed8B" %} 7238 ins_encode %{ 7239 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7240 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 7241 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 7242 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7243 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 7244 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 7245 %} 7246 ins_pipe( pipe_slow ); 7247 %} 7248 7249 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp2, rRegI scratch) %{ 7250 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 7251 match(Set dst (MulVB src1 src2)); 7252 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7253 format %{"pmovsxbw $tmp1,$src1\n\t" 7254 "pmovsxbw $tmp2,$src2\n\t" 7255 "pmullw $tmp1,$tmp2\n\t" 7256 "pshufd $tmp2,$src1,0xEE\n\t" 7257 "pshufd $dst,$src2,0xEE\n\t" 7258 "pmovsxbw $tmp2,$tmp2\n\t" 7259 "pmovsxbw $dst,$dst\n\t" 7260 "pmullw $tmp2,$dst\n\t" 7261 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7262 "pand $tmp2,$dst\n\t" 7263 "pand $dst,$tmp1\n\t" 7264 "packuswb $dst,$tmp2\t! mul packed16B" %} 7265 ins_encode %{ 7266 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 7267 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7268 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 7269 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 7270 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 7271 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 7272 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 7273 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 7274 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7275 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 7276 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 7277 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 7278 %} 7279 ins_pipe( pipe_slow ); 7280 %} 7281 7282 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp, rRegI scratch) %{ 7283 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7284 match(Set dst (MulVB src1 src2)); 7285 effect(TEMP dst, TEMP tmp, TEMP scratch); 7286 format %{"vpmovsxbw $tmp,$src1\n\t" 7287 "vpmovsxbw $dst,$src2\n\t" 7288 "vpmullw $tmp,$tmp,$dst\n\t" 7289 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 7290 "vpand $dst,$dst,$tmp\n\t" 7291 "vextracti128_high $tmp,$dst\n\t" 7292 "vpackuswb $dst,$dst,$dst\n\t! mul packed16B" %} 7293 ins_encode %{ 7294 int vector_len = 1; 7295 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 7296 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7297 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len); 7298 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7299 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 7300 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 7301 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 7302 %} 7303 ins_pipe( pipe_slow ); 7304 %} 7305 7306 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, rRegI scratch) %{ 7307 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 7308 match(Set dst (MulVB src1 src2)); 7309 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7310 format %{"vextracti128_high $tmp1,$src1\n\t" 7311 "vextracti128_high $dst,$src2\n\t" 7312 "vpmovsxbw $tmp1,$tmp1\n\t" 7313 "vpmovsxbw $dst,$dst\n\t" 7314 "vpmullw $tmp1,$tmp1,$dst\n\t" 7315 "vpmovsxbw $tmp2,$src1\n\t" 7316 "vpmovsxbw $dst,$src2\n\t" 7317 "vpmullw $tmp2,$tmp2,$dst\n\t" 7318 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7319 "vpbroadcastd $dst, $dst\n\t" 7320 "vpand $tmp1,$tmp1,$dst\n\t" 7321 "vpand $dst,$dst,$tmp2\n\t" 7322 "vpackuswb $dst,$dst,$tmp1\n\t" 7323 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 7324 ins_encode %{ 7325 int vector_len = 1; 7326 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7327 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 7328 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7329 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7330 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7331 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7332 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7333 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7334 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7335 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7336 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7337 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7338 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7339 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 7340 %} 7341 ins_pipe( pipe_slow ); 7342 %} 7343 7344 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 7345 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 7346 match(Set dst (MulVB src1 src2)); 7347 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 7348 format %{"vextracti64x4_high $tmp1,$src1\n\t" 7349 "vextracti64x4_high $dst,$src2\n\t" 7350 "vpmovsxbw $tmp1,$tmp1\n\t" 7351 "vpmovsxbw $dst,$dst\n\t" 7352 "vpmullw $tmp1,$tmp1,$dst\n\t" 7353 "vpmovsxbw $tmp2,$src1\n\t" 7354 "vpmovsxbw $dst,$src2\n\t" 7355 "vpmullw $tmp2,$tmp2,$dst\n\t" 7356 "vmovdqu $dst, [0x00ff00ff0x00ff00ff]\n\t" 7357 "vpbroadcastd $dst, $dst\n\t" 7358 "vpand $tmp1,$tmp1,$dst\n\t" 7359 "vpand $tmp2,$tmp2,$dst\n\t" 7360 "vpackuswb $dst,$tmp1,$tmp2\n\t" 7361 "evmovdquq $tmp2,[0x0604020007050301]\n\t" 7362 "vpermq $dst,$tmp2,$dst,0x01\t! mul packed64B" %} 7363 7364 ins_encode %{ 7365 int vector_len = 2; 7366 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7367 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 7368 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7369 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7370 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7371 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7372 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len); 7373 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7374 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 7375 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 7376 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 7377 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7378 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7379 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 7380 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 7381 7382 %} 7383 ins_pipe( pipe_slow ); 7384 %} 7385 7386 // Shorts/Chars vector mul 7387 instruct vmul2S(vecS dst, vecS src) %{ 7388 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7389 match(Set dst (MulVS dst src)); 7390 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7391 ins_encode %{ 7392 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7393 %} 7394 ins_pipe( pipe_slow ); 7395 %} 7396 7397 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 7398 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7399 match(Set dst (MulVS src1 src2)); 7400 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7401 ins_encode %{ 7402 int vector_len = 0; 7403 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7404 %} 7405 ins_pipe( pipe_slow ); 7406 %} 7407 7408 instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ 7409 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7410 match(Set dst (MulVS src (LoadVector mem))); 7411 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7412 ins_encode %{ 7413 int vector_len = 0; 7414 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7415 %} 7416 ins_pipe( pipe_slow ); 7417 %} 7418 7419 instruct vmul4S(vecD dst, vecD src) %{ 7420 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7421 match(Set dst (MulVS dst src)); 7422 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7423 ins_encode %{ 7424 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7425 %} 7426 ins_pipe( pipe_slow ); 7427 %} 7428 7429 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 7430 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7431 match(Set dst (MulVS src1 src2)); 7432 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7433 ins_encode %{ 7434 int vector_len = 0; 7435 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7436 %} 7437 ins_pipe( pipe_slow ); 7438 %} 7439 7440 instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ 7441 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7442 match(Set dst (MulVS src (LoadVector mem))); 7443 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7444 ins_encode %{ 7445 int vector_len = 0; 7446 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7447 %} 7448 ins_pipe( pipe_slow ); 7449 %} 7450 7451 instruct vmul8S(vecX dst, vecX src) %{ 7452 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7453 match(Set dst (MulVS dst src)); 7454 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7455 ins_encode %{ 7456 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 7462 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7463 match(Set dst (MulVS src1 src2)); 7464 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7465 ins_encode %{ 7466 int vector_len = 0; 7467 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 7473 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7474 match(Set dst (MulVS src (LoadVector mem))); 7475 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7476 ins_encode %{ 7477 int vector_len = 0; 7478 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 7484 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7485 match(Set dst (MulVS src1 src2)); 7486 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7487 ins_encode %{ 7488 int vector_len = 1; 7489 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 7495 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7496 match(Set dst (MulVS src (LoadVector mem))); 7497 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7498 ins_encode %{ 7499 int vector_len = 1; 7500 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7501 %} 7502 ins_pipe( pipe_slow ); 7503 %} 7504 7505 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7506 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7507 match(Set dst (MulVS src1 src2)); 7508 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7509 ins_encode %{ 7510 int vector_len = 2; 7511 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7512 %} 7513 ins_pipe( pipe_slow ); 7514 %} 7515 7516 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7517 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7518 match(Set dst (MulVS src (LoadVector mem))); 7519 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7520 ins_encode %{ 7521 int vector_len = 2; 7522 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7523 %} 7524 ins_pipe( pipe_slow ); 7525 %} 7526 7527 // Integers vector mul (sse4_1) 7528 instruct vmul2I(vecD dst, vecD src) %{ 7529 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7530 match(Set dst (MulVI dst src)); 7531 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7532 ins_encode %{ 7533 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7534 %} 7535 ins_pipe( pipe_slow ); 7536 %} 7537 7538 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7539 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7540 match(Set dst (MulVI src1 src2)); 7541 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7542 ins_encode %{ 7543 int vector_len = 0; 7544 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7545 %} 7546 ins_pipe( pipe_slow ); 7547 %} 7548 7549 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7550 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7551 match(Set dst (MulVI src (LoadVector mem))); 7552 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7553 ins_encode %{ 7554 int vector_len = 0; 7555 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 instruct vmul4I(vecX dst, vecX src) %{ 7561 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7562 match(Set dst (MulVI dst src)); 7563 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7564 ins_encode %{ 7565 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7571 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7572 match(Set dst (MulVI src1 src2)); 7573 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7574 ins_encode %{ 7575 int vector_len = 0; 7576 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7582 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7583 match(Set dst (MulVI src (LoadVector mem))); 7584 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7585 ins_encode %{ 7586 int vector_len = 0; 7587 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7588 %} 7589 ins_pipe( pipe_slow ); 7590 %} 7591 7592 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7593 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7594 match(Set dst (MulVL src1 src2)); 7595 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7596 ins_encode %{ 7597 int vector_len = 0; 7598 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7604 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7605 match(Set dst (MulVL src (LoadVector mem))); 7606 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7607 ins_encode %{ 7608 int vector_len = 0; 7609 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7615 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7616 match(Set dst (MulVL src1 src2)); 7617 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7618 ins_encode %{ 7619 int vector_len = 1; 7620 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7626 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7627 match(Set dst (MulVL src (LoadVector mem))); 7628 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7629 ins_encode %{ 7630 int vector_len = 1; 7631 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7632 %} 7633 ins_pipe( pipe_slow ); 7634 %} 7635 7636 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7637 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7638 match(Set dst (MulVL src1 src2)); 7639 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7640 ins_encode %{ 7641 int vector_len = 2; 7642 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7643 %} 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7648 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7649 match(Set dst (MulVL src (LoadVector mem))); 7650 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7651 ins_encode %{ 7652 int vector_len = 2; 7653 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7654 %} 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7659 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7660 match(Set dst (MulVI src1 src2)); 7661 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7662 ins_encode %{ 7663 int vector_len = 1; 7664 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7665 %} 7666 ins_pipe( pipe_slow ); 7667 %} 7668 7669 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7670 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7671 match(Set dst (MulVI src (LoadVector mem))); 7672 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7673 ins_encode %{ 7674 int vector_len = 1; 7675 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7681 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7682 match(Set dst (MulVI src1 src2)); 7683 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7684 ins_encode %{ 7685 int vector_len = 2; 7686 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7687 %} 7688 ins_pipe( pipe_slow ); 7689 %} 7690 7691 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7692 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7693 match(Set dst (MulVI src (LoadVector mem))); 7694 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7695 ins_encode %{ 7696 int vector_len = 2; 7697 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7698 %} 7699 ins_pipe( pipe_slow ); 7700 %} 7701 7702 // Floats vector mul 7703 instruct vmul2F(vecD dst, vecD src) %{ 7704 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7705 match(Set dst (MulVF dst src)); 7706 format %{ "mulps $dst,$src\t! mul packed2F" %} 7707 ins_encode %{ 7708 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7709 %} 7710 ins_pipe( pipe_slow ); 7711 %} 7712 7713 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7714 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7715 match(Set dst (MulVF src1 src2)); 7716 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7717 ins_encode %{ 7718 int vector_len = 0; 7719 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7720 %} 7721 ins_pipe( pipe_slow ); 7722 %} 7723 7724 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 7725 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7726 match(Set dst (MulVF src (LoadVector mem))); 7727 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 7728 ins_encode %{ 7729 int vector_len = 0; 7730 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7731 %} 7732 ins_pipe( pipe_slow ); 7733 %} 7734 7735 instruct vmul4F(vecX dst, vecX src) %{ 7736 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7737 match(Set dst (MulVF dst src)); 7738 format %{ "mulps $dst,$src\t! mul packed4F" %} 7739 ins_encode %{ 7740 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 7746 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7747 match(Set dst (MulVF src1 src2)); 7748 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 7749 ins_encode %{ 7750 int vector_len = 0; 7751 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 7757 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7758 match(Set dst (MulVF src (LoadVector mem))); 7759 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 7760 ins_encode %{ 7761 int vector_len = 0; 7762 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7763 %} 7764 ins_pipe( pipe_slow ); 7765 %} 7766 7767 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 7768 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7769 match(Set dst (MulVF src1 src2)); 7770 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 7771 ins_encode %{ 7772 int vector_len = 1; 7773 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 7779 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7780 match(Set dst (MulVF src (LoadVector mem))); 7781 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 7782 ins_encode %{ 7783 int vector_len = 1; 7784 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7790 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7791 match(Set dst (MulVF src1 src2)); 7792 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 7793 ins_encode %{ 7794 int vector_len = 2; 7795 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7796 %} 7797 ins_pipe( pipe_slow ); 7798 %} 7799 7800 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 7801 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7802 match(Set dst (MulVF src (LoadVector mem))); 7803 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 7804 ins_encode %{ 7805 int vector_len = 2; 7806 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7807 %} 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 // Doubles vector mul 7812 instruct vmul2D(vecX dst, vecX src) %{ 7813 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7814 match(Set dst (MulVD dst src)); 7815 format %{ "mulpd $dst,$src\t! mul packed2D" %} 7816 ins_encode %{ 7817 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 7823 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7824 match(Set dst (MulVD src1 src2)); 7825 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 7826 ins_encode %{ 7827 int vector_len = 0; 7828 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7829 %} 7830 ins_pipe( pipe_slow ); 7831 %} 7832 7833 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 7834 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7835 match(Set dst (MulVD src (LoadVector mem))); 7836 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 7837 ins_encode %{ 7838 int vector_len = 0; 7839 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7840 %} 7841 ins_pipe( pipe_slow ); 7842 %} 7843 7844 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 7845 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7846 match(Set dst (MulVD src1 src2)); 7847 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 7848 ins_encode %{ 7849 int vector_len = 1; 7850 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7851 %} 7852 ins_pipe( pipe_slow ); 7853 %} 7854 7855 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 7856 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7857 match(Set dst (MulVD src (LoadVector mem))); 7858 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 7859 ins_encode %{ 7860 int vector_len = 1; 7861 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7862 %} 7863 ins_pipe( pipe_slow ); 7864 %} 7865 7866 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7867 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7868 match(Set dst (MulVD src1 src2)); 7869 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 7870 ins_encode %{ 7871 int vector_len = 2; 7872 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 7878 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7879 match(Set dst (MulVD src (LoadVector mem))); 7880 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 7881 ins_encode %{ 7882 int vector_len = 2; 7883 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7884 %} 7885 ins_pipe( pipe_slow ); 7886 %} 7887 7888 instruct vcmov8F_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7889 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7890 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 7891 effect(TEMP dst, USE src1, USE src2); 7892 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 7893 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 7894 %} 7895 ins_encode %{ 7896 int vector_len = 1; 7897 int cond = (Assembler::Condition)($copnd$$cmpcode); 7898 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7899 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7900 %} 7901 ins_pipe( pipe_slow ); 7902 %} 7903 7904 instruct vcmov4D_reg(legVecY dst, legVecY src1, legVecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 7905 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7906 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 7907 effect(TEMP dst, USE src1, USE src2); 7908 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 7909 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 7910 %} 7911 ins_encode %{ 7912 int vector_len = 1; 7913 int cond = (Assembler::Condition)($copnd$$cmpcode); 7914 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 7915 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 // --------------------------------- DIV -------------------------------------- 7921 7922 // Floats vector div 7923 instruct vdiv2F(vecD dst, vecD src) %{ 7924 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7925 match(Set dst (DivVF dst src)); 7926 format %{ "divps $dst,$src\t! div packed2F" %} 7927 ins_encode %{ 7928 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7929 %} 7930 ins_pipe( pipe_slow ); 7931 %} 7932 7933 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 7934 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7935 match(Set dst (DivVF src1 src2)); 7936 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 7937 ins_encode %{ 7938 int vector_len = 0; 7939 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7940 %} 7941 ins_pipe( pipe_slow ); 7942 %} 7943 7944 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 7945 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7946 match(Set dst (DivVF src (LoadVector mem))); 7947 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 7948 ins_encode %{ 7949 int vector_len = 0; 7950 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7951 %} 7952 ins_pipe( pipe_slow ); 7953 %} 7954 7955 instruct vdiv4F(vecX dst, vecX src) %{ 7956 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7957 match(Set dst (DivVF dst src)); 7958 format %{ "divps $dst,$src\t! div packed4F" %} 7959 ins_encode %{ 7960 __ divps($dst$$XMMRegister, $src$$XMMRegister); 7961 %} 7962 ins_pipe( pipe_slow ); 7963 %} 7964 7965 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 7966 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7967 match(Set dst (DivVF src1 src2)); 7968 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 7969 ins_encode %{ 7970 int vector_len = 0; 7971 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7972 %} 7973 ins_pipe( pipe_slow ); 7974 %} 7975 7976 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 7977 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7978 match(Set dst (DivVF src (LoadVector mem))); 7979 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 7980 ins_encode %{ 7981 int vector_len = 0; 7982 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7983 %} 7984 ins_pipe( pipe_slow ); 7985 %} 7986 7987 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 7988 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7989 match(Set dst (DivVF src1 src2)); 7990 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 7991 ins_encode %{ 7992 int vector_len = 1; 7993 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7994 %} 7995 ins_pipe( pipe_slow ); 7996 %} 7997 7998 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 7999 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8000 match(Set dst (DivVF src (LoadVector mem))); 8001 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8002 ins_encode %{ 8003 int vector_len = 1; 8004 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8005 %} 8006 ins_pipe( pipe_slow ); 8007 %} 8008 8009 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8010 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8011 match(Set dst (DivVF src1 src2)); 8012 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8013 ins_encode %{ 8014 int vector_len = 2; 8015 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8016 %} 8017 ins_pipe( pipe_slow ); 8018 %} 8019 8020 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8021 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8022 match(Set dst (DivVF src (LoadVector mem))); 8023 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8024 ins_encode %{ 8025 int vector_len = 2; 8026 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8027 %} 8028 ins_pipe( pipe_slow ); 8029 %} 8030 8031 // Doubles vector div 8032 instruct vdiv2D(vecX dst, vecX src) %{ 8033 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8034 match(Set dst (DivVD dst src)); 8035 format %{ "divpd $dst,$src\t! div packed2D" %} 8036 ins_encode %{ 8037 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8038 %} 8039 ins_pipe( pipe_slow ); 8040 %} 8041 8042 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8043 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8044 match(Set dst (DivVD src1 src2)); 8045 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8046 ins_encode %{ 8047 int vector_len = 0; 8048 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8049 %} 8050 ins_pipe( pipe_slow ); 8051 %} 8052 8053 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8054 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8055 match(Set dst (DivVD src (LoadVector mem))); 8056 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8057 ins_encode %{ 8058 int vector_len = 0; 8059 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8060 %} 8061 ins_pipe( pipe_slow ); 8062 %} 8063 8064 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8065 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8066 match(Set dst (DivVD src1 src2)); 8067 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8068 ins_encode %{ 8069 int vector_len = 1; 8070 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8071 %} 8072 ins_pipe( pipe_slow ); 8073 %} 8074 8075 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8076 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8077 match(Set dst (DivVD src (LoadVector mem))); 8078 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8079 ins_encode %{ 8080 int vector_len = 1; 8081 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8082 %} 8083 ins_pipe( pipe_slow ); 8084 %} 8085 8086 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8087 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8088 match(Set dst (DivVD src1 src2)); 8089 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8090 ins_encode %{ 8091 int vector_len = 2; 8092 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8093 %} 8094 ins_pipe( pipe_slow ); 8095 %} 8096 8097 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8098 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8099 match(Set dst (DivVD src (LoadVector mem))); 8100 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8101 ins_encode %{ 8102 int vector_len = 2; 8103 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8104 %} 8105 ins_pipe( pipe_slow ); 8106 %} 8107 8108 // --------------------------------- Sqrt -------------------------------------- 8109 8110 // Floating point vector sqrt 8111 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8112 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8113 match(Set dst (SqrtVD src)); 8114 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8115 ins_encode %{ 8116 int vector_len = 0; 8117 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8118 %} 8119 ins_pipe( pipe_slow ); 8120 %} 8121 8122 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8123 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8124 match(Set dst (SqrtVD (LoadVector mem))); 8125 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8126 ins_encode %{ 8127 int vector_len = 0; 8128 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8129 %} 8130 ins_pipe( pipe_slow ); 8131 %} 8132 8133 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8134 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8135 match(Set dst (SqrtVD src)); 8136 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8137 ins_encode %{ 8138 int vector_len = 1; 8139 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8140 %} 8141 ins_pipe( pipe_slow ); 8142 %} 8143 8144 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8145 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8146 match(Set dst (SqrtVD (LoadVector mem))); 8147 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8148 ins_encode %{ 8149 int vector_len = 1; 8150 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8151 %} 8152 ins_pipe( pipe_slow ); 8153 %} 8154 8155 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8156 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8157 match(Set dst (SqrtVD src)); 8158 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8159 ins_encode %{ 8160 int vector_len = 2; 8161 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8162 %} 8163 ins_pipe( pipe_slow ); 8164 %} 8165 8166 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8167 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8168 match(Set dst (SqrtVD (LoadVector mem))); 8169 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8170 ins_encode %{ 8171 int vector_len = 2; 8172 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8173 %} 8174 ins_pipe( pipe_slow ); 8175 %} 8176 8177 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8178 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8179 match(Set dst (SqrtVF src)); 8180 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8181 ins_encode %{ 8182 int vector_len = 0; 8183 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8184 %} 8185 ins_pipe( pipe_slow ); 8186 %} 8187 8188 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8189 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8190 match(Set dst (SqrtVF (LoadVector mem))); 8191 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8192 ins_encode %{ 8193 int vector_len = 0; 8194 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8195 %} 8196 ins_pipe( pipe_slow ); 8197 %} 8198 8199 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8200 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8201 match(Set dst (SqrtVF src)); 8202 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8203 ins_encode %{ 8204 int vector_len = 0; 8205 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8206 %} 8207 ins_pipe( pipe_slow ); 8208 %} 8209 8210 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8211 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8212 match(Set dst (SqrtVF (LoadVector mem))); 8213 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8214 ins_encode %{ 8215 int vector_len = 0; 8216 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8222 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8223 match(Set dst (SqrtVF src)); 8224 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8225 ins_encode %{ 8226 int vector_len = 1; 8227 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8233 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8234 match(Set dst (SqrtVF (LoadVector mem))); 8235 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8236 ins_encode %{ 8237 int vector_len = 1; 8238 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8244 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8245 match(Set dst (SqrtVF src)); 8246 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8247 ins_encode %{ 8248 int vector_len = 2; 8249 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8255 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8256 match(Set dst (SqrtVF (LoadVector mem))); 8257 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8258 ins_encode %{ 8259 int vector_len = 2; 8260 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 // ------------------------------ Shift --------------------------------------- 8266 8267 // Left and right shift count vectors are the same on x86 8268 // (only lowest bits of xmm reg are used for count). 8269 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8270 match(Set dst (LShiftCntV cnt)); 8271 match(Set dst (RShiftCntV cnt)); 8272 format %{ "movdl $dst,$cnt\t! load shift count" %} 8273 ins_encode %{ 8274 __ movdl($dst$$XMMRegister, $cnt$$Register); 8275 %} 8276 ins_pipe( pipe_slow ); 8277 %} 8278 8279 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{ 8280 match(Set dst cnt); 8281 effect(TEMP tmp); 8282 format %{ "movl $tmp,$cnt\t" 8283 "movdl $dst,$tmp\t! load shift count" %} 8284 ins_encode %{ 8285 __ movl($tmp$$Register, $cnt$$constant); 8286 __ movdl($dst$$XMMRegister, $tmp$$Register); 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 // Byte vector shift 8292 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{ 8293 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8294 match(Set dst (LShiftVB src shift)); 8295 match(Set dst (RShiftVB src shift)); 8296 match(Set dst (URShiftVB src shift)); 8297 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 8298 format %{"vextendbw $tmp,$src\n\t" 8299 "vshiftw $tmp,$shift\n\t" 8300 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8301 "pand $dst,$tmp\n\t" 8302 "packuswb $dst,$dst\n\t ! packed4B shift" %} 8303 ins_encode %{ 8304 int opcode = this->as_Mach()->ideal_Opcode(); 8305 8306 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 8307 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 8308 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8309 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 8310 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 8311 %} 8312 ins_pipe( pipe_slow ); 8313 %} 8314 8315 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{ 8316 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 8317 match(Set dst (LShiftVB src shift)); 8318 match(Set dst (RShiftVB src shift)); 8319 match(Set dst (URShiftVB src shift)); 8320 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 8321 format %{"vextendbw $tmp,$src\n\t" 8322 "vshiftw $tmp,$shift\n\t" 8323 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8324 "pand $dst,$tmp\n\t" 8325 "packuswb $dst,$dst\n\t ! packed8B shift" %} 8326 ins_encode %{ 8327 int opcode = this->as_Mach()->ideal_Opcode(); 8328 8329 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister); 8330 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 8331 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8332 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 8333 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 8334 %} 8335 ins_pipe( pipe_slow ); 8336 %} 8337 8338 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{ 8339 predicate(UseSSE > 3 && UseAVX <= 1 && n->as_Vector()->length() == 16); 8340 match(Set dst (LShiftVB src shift)); 8341 match(Set dst (RShiftVB src shift)); 8342 match(Set dst (URShiftVB src shift)); 8343 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 8344 format %{"vextendbw $tmp1,$src\n\t" 8345 "vshiftw $tmp1,$shift\n\t" 8346 "pshufd $tmp2,$src\n\t" 8347 "vextendbw $tmp2,$tmp2\n\t" 8348 "vshiftw $tmp2,$shift\n\t" 8349 "movdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8350 "pand $tmp2,$dst\n\t" 8351 "pand $dst,$tmp1\n\t" 8352 "packuswb $dst,$tmp2\n\t! packed16B shift" %} 8353 ins_encode %{ 8354 int opcode = this->as_Mach()->ideal_Opcode(); 8355 8356 __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister); 8357 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 8358 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 8359 __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 8360 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 8361 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8362 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 8363 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 8364 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 8365 %} 8366 ins_pipe( pipe_slow ); 8367 %} 8368 8369 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8370 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8371 match(Set dst (LShiftVB src shift)); 8372 match(Set dst (RShiftVB src shift)); 8373 match(Set dst (URShiftVB src shift)); 8374 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 8375 format %{"vextendbw $tmp,$src\n\t" 8376 "vshiftw $tmp,$tmp,$shift\n\t" 8377 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8378 "vextracti128_high $dst,$tmp\n\t" 8379 "vpackuswb $dst,$tmp,$dst\n\t! packed16B shift" %} 8380 ins_encode %{ 8381 int opcode = this->as_Mach()->ideal_Opcode(); 8382 8383 int vector_len = 1; 8384 __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 8385 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8386 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8387 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 8388 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 8389 %} 8390 ins_pipe( pipe_slow ); 8391 %} 8392 8393 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8394 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 8395 match(Set dst (LShiftVB src shift)); 8396 match(Set dst (RShiftVB src shift)); 8397 match(Set dst (URShiftVB src shift)); 8398 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 8399 format %{"vextracti128_high $tmp,$src\n\t" 8400 "vextendbw $tmp,$tmp\n\t" 8401 "vextendbw $dst,$src\n\t" 8402 "vshiftw $tmp,$tmp,$shift\n\t" 8403 "vshiftw $dst,$dst,$shift\n\t" 8404 "vpand $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t" 8405 "vpand $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t" 8406 "vpackuswb $dst,$dst,$tmp\n\t" 8407 "vpermq $dst,$dst,0xD8\n\t! packed32B shift" %} 8408 ins_encode %{ 8409 int opcode = this->as_Mach()->ideal_Opcode(); 8410 8411 int vector_len = 1; 8412 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 8413 __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8414 __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 8415 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8416 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len); 8417 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8418 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 8419 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8420 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 8421 %} 8422 ins_pipe( pipe_slow ); 8423 %} 8424 8425 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{ 8426 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 8427 match(Set dst (LShiftVB src shift)); 8428 match(Set dst (RShiftVB src shift)); 8429 match(Set dst (URShiftVB src shift)); 8430 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 8431 format %{"vextracti64x4 $tmp1,$src\n\t" 8432 "vextendbw $tmp1,$tmp1\n\t" 8433 "vextendbw $tmp2,$src\n\t" 8434 "vshiftw $tmp1,$tmp1,$shift\n\t" 8435 "vshiftw $tmp2,$tmp2,$shift\n\t" 8436 "vmovdqu $dst,[0x00ff00ff0x00ff00ff]\n\t" 8437 "vpbroadcastd $dst,$dst\n\t" 8438 "vpand $tmp1,$tmp1,$dst\n\t" 8439 "vpand $tmp2,$tmp2,$dst\n\t" 8440 "vpackuswb $dst,$tmp1,$tmp2\n\t" 8441 "evmovdquq $tmp2, [0x0604020007050301]\n\t" 8442 "vpermq $dst,$tmp2,$dst\n\t! packed64B shift" %} 8443 ins_encode %{ 8444 int opcode = this->as_Mach()->ideal_Opcode(); 8445 8446 int vector_len = 2; 8447 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 8448 __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8449 __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len); 8450 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len); 8451 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len); 8452 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 8453 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 8454 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len); 8455 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 8456 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8457 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register); 8458 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len); 8459 %} 8460 ins_pipe( pipe_slow ); 8461 %} 8462 8463 // Shorts vector logical right shift produces incorrect Java result 8464 // for negative data because java code convert short value into int with 8465 // sign extension before a shift. But char vectors are fine since chars are 8466 // unsigned values. 8467 // Shorts/Chars vector left shift 8468 instruct vshist2S(vecS dst, vecS src, vecS shift) %{ 8469 predicate(n->as_Vector()->length() == 2); 8470 match(Set dst (LShiftVS src shift)); 8471 match(Set dst (RShiftVS src shift)); 8472 match(Set dst (URShiftVS src shift)); 8473 effect(TEMP dst, USE src, USE shift); 8474 format %{ "vshiftw $dst,$src,$shift\t! shift packed2S" %} 8475 ins_encode %{ 8476 int opcode = this->as_Mach()->ideal_Opcode(); 8477 if (UseAVX == 0) { 8478 if ($dst$$XMMRegister != $src$$XMMRegister) 8479 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 8480 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8481 } else { 8482 int vector_len = 0; 8483 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8484 } 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vshift4S(vecD dst, vecD src, vecS shift) %{ 8490 predicate(n->as_Vector()->length() == 4); 8491 match(Set dst (LShiftVS src shift)); 8492 match(Set dst (RShiftVS src shift)); 8493 match(Set dst (URShiftVS src shift)); 8494 effect(TEMP dst, USE src, USE shift); 8495 format %{ "vshiftw $dst,$src,$shift\t! shift packed4S" %} 8496 ins_encode %{ 8497 int opcode = this->as_Mach()->ideal_Opcode(); 8498 if (UseAVX == 0) { 8499 if ($dst$$XMMRegister != $src$$XMMRegister) 8500 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8501 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8502 8503 } else { 8504 int vector_len = 0; 8505 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8506 } 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vshift8S(vecX dst, vecX src, vecS shift) %{ 8512 predicate(n->as_Vector()->length() == 8); 8513 match(Set dst (LShiftVS src shift)); 8514 match(Set dst (RShiftVS src shift)); 8515 match(Set dst (URShiftVS src shift)); 8516 effect(TEMP dst, USE src, USE shift); 8517 format %{ "vshiftw $dst,$src,$shift\t! shift packed8S" %} 8518 ins_encode %{ 8519 int opcode = this->as_Mach()->ideal_Opcode(); 8520 if (UseAVX == 0) { 8521 if ($dst$$XMMRegister != $src$$XMMRegister) 8522 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8523 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8524 } else { 8525 int vector_len = 0; 8526 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8527 } 8528 %} 8529 ins_pipe( pipe_slow ); 8530 %} 8531 8532 instruct vshift16S(vecY dst, vecY src, vecS shift) %{ 8533 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 8534 match(Set dst (LShiftVS src shift)); 8535 match(Set dst (RShiftVS src shift)); 8536 match(Set dst (URShiftVS src shift)); 8537 effect(DEF dst, USE src, USE shift); 8538 format %{ "vshiftw $dst,$src,$shift\t! shift packed16S" %} 8539 ins_encode %{ 8540 int vector_len = 1; 8541 int opcode = this->as_Mach()->ideal_Opcode(); 8542 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8543 %} 8544 ins_pipe( pipe_slow ); 8545 %} 8546 8547 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{ 8548 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8549 match(Set dst (LShiftVS src shift)); 8550 match(Set dst (RShiftVS src shift)); 8551 match(Set dst (URShiftVS src shift)); 8552 effect(DEF dst, USE src, USE shift); 8553 format %{ "vshiftw $dst,$src,$shift\t! shift packed32S" %} 8554 ins_encode %{ 8555 int vector_len = 2; 8556 int opcode = this->as_Mach()->ideal_Opcode(); 8557 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8558 %} 8559 ins_pipe( pipe_slow ); 8560 %} 8561 8562 // Integers vector left shift 8563 instruct vshift2I(vecD dst, vecD src, vecS shift) %{ 8564 predicate(n->as_Vector()->length() == 2); 8565 match(Set dst (LShiftVI src shift)); 8566 match(Set dst (RShiftVI src shift)); 8567 match(Set dst (URShiftVI src shift)); 8568 effect(TEMP dst, USE src, USE shift); 8569 format %{ "vshiftd $dst,$src,$shift\t! shift packed2I" %} 8570 ins_encode %{ 8571 int opcode = this->as_Mach()->ideal_Opcode(); 8572 if (UseAVX == 0) { 8573 if ($dst$$XMMRegister != $src$$XMMRegister) 8574 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 8575 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8576 } else { 8577 int vector_len = 0; 8578 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8579 } 8580 %} 8581 ins_pipe( pipe_slow ); 8582 %} 8583 8584 instruct vshift4I(vecX dst, vecX src, vecS shift) %{ 8585 predicate(n->as_Vector()->length() == 4); 8586 match(Set dst (LShiftVI src shift)); 8587 match(Set dst (RShiftVI src shift)); 8588 match(Set dst (URShiftVI src shift)); 8589 effect(TEMP dst, USE src, USE shift); 8590 format %{ "vshiftd $dst,$src,$shift\t! shift packed4I" %} 8591 ins_encode %{ 8592 int opcode = this->as_Mach()->ideal_Opcode(); 8593 if (UseAVX == 0) { 8594 if ($dst$$XMMRegister != $src$$XMMRegister) 8595 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8596 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8597 } else { 8598 int vector_len = 0; 8599 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8600 } 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 instruct vshift8I(vecY dst, vecY src, vecS shift) %{ 8606 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8607 match(Set dst (LShiftVI src shift)); 8608 match(Set dst (RShiftVI src shift)); 8609 match(Set dst (URShiftVI src shift)); 8610 effect(DEF dst, USE src, USE shift); 8611 format %{ "vshiftd $dst,$src,$shift\t! shift packed8I" %} 8612 ins_encode %{ 8613 int vector_len = 1; 8614 int opcode = this->as_Mach()->ideal_Opcode(); 8615 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8616 %} 8617 ins_pipe( pipe_slow ); 8618 %} 8619 8620 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{ 8621 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8622 match(Set dst (LShiftVI src shift)); 8623 match(Set dst (RShiftVI src shift)); 8624 match(Set dst (URShiftVI src shift)); 8625 effect(DEF dst, USE src, USE shift); 8626 format %{ "vshiftd $dst,$src,$shift\t! shift packed16I" %} 8627 ins_encode %{ 8628 int vector_len = 2; 8629 int opcode = this->as_Mach()->ideal_Opcode(); 8630 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8631 %} 8632 ins_pipe( pipe_slow ); 8633 %} 8634 8635 // Longs vector shift 8636 instruct vshift2L(vecX dst, vecX src, vecS shift) %{ 8637 predicate(n->as_Vector()->length() == 2); 8638 match(Set dst (LShiftVL src shift)); 8639 match(Set dst (URShiftVL src shift)); 8640 effect(TEMP dst, USE src, USE shift); 8641 format %{ "vshiftq $dst,$src,$shift\t! shift packed2L" %} 8642 ins_encode %{ 8643 int opcode = this->as_Mach()->ideal_Opcode(); 8644 if (UseAVX == 0) { 8645 if ($dst$$XMMRegister != $src$$XMMRegister) 8646 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8647 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 8648 } else { 8649 int vector_len = 0; 8650 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8651 } 8652 %} 8653 ins_pipe( pipe_slow ); 8654 %} 8655 8656 instruct vshift4L(vecY dst, vecY src, vecS shift) %{ 8657 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8658 match(Set dst (LShiftVL src shift)); 8659 match(Set dst (URShiftVL src shift)); 8660 effect(DEF dst, USE src, USE shift); 8661 format %{ "vshiftq $dst,$src,$shift\t! left shift packed4L" %} 8662 ins_encode %{ 8663 int vector_len = 1; 8664 int opcode = this->as_Mach()->ideal_Opcode(); 8665 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8666 %} 8667 ins_pipe( pipe_slow ); 8668 %} 8669 8670 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{ 8671 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8672 match(Set dst (LShiftVL src shift)); 8673 match(Set dst (RShiftVL src shift)); 8674 match(Set dst (URShiftVL src shift)); 8675 effect(DEF dst, USE src, USE shift); 8676 format %{ "vshiftq $dst,$src,$shift\t! shift packed8L" %} 8677 ins_encode %{ 8678 int vector_len = 2; 8679 int opcode = this->as_Mach()->ideal_Opcode(); 8680 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8681 %} 8682 ins_pipe( pipe_slow ); 8683 %} 8684 8685 // -------------------ArithmeticRightShift ----------------------------------- 8686 // Long vector arithmetic right shift 8687 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{ 8688 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 8689 match(Set dst (RShiftVL src shift)); 8690 effect(TEMP dst, TEMP tmp, TEMP scratch); 8691 format %{ "movdqu $dst,$src\n\t" 8692 "psrlq $dst,$shift\n\t" 8693 "movdqu $tmp,[0x8000000000000000]\n\t" 8694 "psrlq $tmp,$shift\n\t" 8695 "pxor $dst,$tmp\n\t" 8696 "psubq $dst,$tmp\t! arithmetic right shift packed2L" %} 8697 ins_encode %{ 8698 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 8699 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 8700 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8701 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 8702 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 8703 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 8704 %} 8705 ins_pipe( pipe_slow ); 8706 %} 8707 8708 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ 8709 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 8710 match(Set dst (RShiftVL src shift)); 8711 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 8712 ins_encode %{ 8713 int vector_len = 0; 8714 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8715 %} 8716 ins_pipe( pipe_slow ); 8717 %} 8718 8719 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{ 8720 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8721 match(Set dst (RShiftVL src shift)); 8722 effect(TEMP dst, TEMP tmp, TEMP scratch); 8723 format %{ "vpsrlq $dst,$src,$shift\n\t" 8724 "vmovdqu $tmp,[0x8000000000000000]\n\t" 8725 "vpsrlq $tmp,$tmp,$shift\n\t" 8726 "vpxor $dst,$dst,$tmp\n\t" 8727 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 8728 ins_encode %{ 8729 int vector_len = 1; 8730 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8731 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 8732 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 8733 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8734 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8735 %} 8736 ins_pipe( pipe_slow ); 8737 %} 8738 8739 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ 8740 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 8741 match(Set dst (RShiftVL src shift)); 8742 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} 8743 ins_encode %{ 8744 int vector_len = 1; 8745 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8746 %} 8747 ins_pipe( pipe_slow ); 8748 %} 8749 8750 // --------------------------------- AND -------------------------------------- 8751 8752 instruct vand4B(vecS dst, vecS src) %{ 8753 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 8754 match(Set dst (AndV dst src)); 8755 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 8756 ins_encode %{ 8757 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 %} 8761 8762 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 8763 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8764 match(Set dst (AndV src1 src2)); 8765 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 8766 ins_encode %{ 8767 int vector_len = 0; 8768 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8769 %} 8770 ins_pipe( pipe_slow ); 8771 %} 8772 8773 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 8774 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8775 match(Set dst (AndV src (LoadVector mem))); 8776 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 8777 ins_encode %{ 8778 int vector_len = 0; 8779 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8780 %} 8781 ins_pipe( pipe_slow ); 8782 %} 8783 8784 instruct vand8B(vecD dst, vecD src) %{ 8785 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 8786 match(Set dst (AndV dst src)); 8787 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 8788 ins_encode %{ 8789 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8790 %} 8791 ins_pipe( pipe_slow ); 8792 %} 8793 8794 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 8795 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8796 match(Set dst (AndV src1 src2)); 8797 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 8798 ins_encode %{ 8799 int vector_len = 0; 8800 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 8806 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8807 match(Set dst (AndV src (LoadVector mem))); 8808 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 8809 ins_encode %{ 8810 int vector_len = 0; 8811 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8812 %} 8813 ins_pipe( pipe_slow ); 8814 %} 8815 8816 instruct vand16B(vecX dst, vecX src) %{ 8817 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 8818 match(Set dst (AndV dst src)); 8819 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 8820 ins_encode %{ 8821 __ pand($dst$$XMMRegister, $src$$XMMRegister); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 8827 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8828 match(Set dst (AndV src1 src2)); 8829 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 8830 ins_encode %{ 8831 int vector_len = 0; 8832 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8833 %} 8834 ins_pipe( pipe_slow ); 8835 %} 8836 8837 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 8838 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8839 match(Set dst (AndV src (LoadVector mem))); 8840 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 8841 ins_encode %{ 8842 int vector_len = 0; 8843 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8844 %} 8845 ins_pipe( pipe_slow ); 8846 %} 8847 8848 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 8849 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8850 match(Set dst (AndV src1 src2)); 8851 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 8852 ins_encode %{ 8853 int vector_len = 1; 8854 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8855 %} 8856 ins_pipe( pipe_slow ); 8857 %} 8858 8859 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 8860 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8861 match(Set dst (AndV src (LoadVector mem))); 8862 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 8863 ins_encode %{ 8864 int vector_len = 1; 8865 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8866 %} 8867 ins_pipe( pipe_slow ); 8868 %} 8869 8870 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8871 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8872 match(Set dst (AndV src1 src2)); 8873 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 8874 ins_encode %{ 8875 int vector_len = 2; 8876 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8877 %} 8878 ins_pipe( pipe_slow ); 8879 %} 8880 8881 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 8882 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 8883 match(Set dst (AndV src (LoadVector mem))); 8884 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 8885 ins_encode %{ 8886 int vector_len = 2; 8887 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8888 %} 8889 ins_pipe( pipe_slow ); 8890 %} 8891 8892 // --------------------------------- OR --------------------------------------- 8893 8894 instruct vor4B(vecS dst, vecS src) %{ 8895 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 8896 match(Set dst (OrV dst src)); 8897 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 8898 ins_encode %{ 8899 __ por($dst$$XMMRegister, $src$$XMMRegister); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 8905 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8906 match(Set dst (OrV src1 src2)); 8907 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 8908 ins_encode %{ 8909 int vector_len = 0; 8910 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8911 %} 8912 ins_pipe( pipe_slow ); 8913 %} 8914 8915 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 8916 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8917 match(Set dst (OrV src (LoadVector mem))); 8918 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 8919 ins_encode %{ 8920 int vector_len = 0; 8921 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8922 %} 8923 ins_pipe( pipe_slow ); 8924 %} 8925 8926 instruct vor8B(vecD dst, vecD src) %{ 8927 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 8928 match(Set dst (OrV dst src)); 8929 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 8930 ins_encode %{ 8931 __ por($dst$$XMMRegister, $src$$XMMRegister); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 8937 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 8938 match(Set dst (OrV src1 src2)); 8939 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 8940 ins_encode %{ 8941 int vector_len = 0; 8942 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8943 %} 8944 ins_pipe( pipe_slow ); 8945 %} 8946 8947 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 8948 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 8949 match(Set dst (OrV src (LoadVector mem))); 8950 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 8951 ins_encode %{ 8952 int vector_len = 0; 8953 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8954 %} 8955 ins_pipe( pipe_slow ); 8956 %} 8957 8958 instruct vor16B(vecX dst, vecX src) %{ 8959 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 8960 match(Set dst (OrV dst src)); 8961 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 8962 ins_encode %{ 8963 __ por($dst$$XMMRegister, $src$$XMMRegister); 8964 %} 8965 ins_pipe( pipe_slow ); 8966 %} 8967 8968 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 8969 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8970 match(Set dst (OrV src1 src2)); 8971 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 8972 ins_encode %{ 8973 int vector_len = 0; 8974 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8975 %} 8976 ins_pipe( pipe_slow ); 8977 %} 8978 8979 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 8980 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 8981 match(Set dst (OrV src (LoadVector mem))); 8982 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 8983 ins_encode %{ 8984 int vector_len = 0; 8985 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8986 %} 8987 ins_pipe( pipe_slow ); 8988 %} 8989 8990 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 8991 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 8992 match(Set dst (OrV src1 src2)); 8993 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 8994 ins_encode %{ 8995 int vector_len = 1; 8996 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8997 %} 8998 ins_pipe( pipe_slow ); 8999 %} 9000 9001 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 9002 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9003 match(Set dst (OrV src (LoadVector mem))); 9004 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 9005 ins_encode %{ 9006 int vector_len = 1; 9007 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9008 %} 9009 ins_pipe( pipe_slow ); 9010 %} 9011 9012 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9013 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9014 match(Set dst (OrV src1 src2)); 9015 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 9016 ins_encode %{ 9017 int vector_len = 2; 9018 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9019 %} 9020 ins_pipe( pipe_slow ); 9021 %} 9022 9023 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9024 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9025 match(Set dst (OrV src (LoadVector mem))); 9026 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 9027 ins_encode %{ 9028 int vector_len = 2; 9029 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9030 %} 9031 ins_pipe( pipe_slow ); 9032 %} 9033 9034 // --------------------------------- XOR -------------------------------------- 9035 9036 instruct vxor4B(vecS dst, vecS src) %{ 9037 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 4); 9038 match(Set dst (XorV dst src)); 9039 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 9040 ins_encode %{ 9041 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 9047 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9048 match(Set dst (XorV src1 src2)); 9049 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 9050 ins_encode %{ 9051 int vector_len = 0; 9052 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 9058 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 9059 match(Set dst (XorV src (LoadVector mem))); 9060 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 9061 ins_encode %{ 9062 int vector_len = 0; 9063 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct vxor8B(vecD dst, vecD src) %{ 9069 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 8); 9070 match(Set dst (XorV dst src)); 9071 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 9072 ins_encode %{ 9073 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9074 %} 9075 ins_pipe( pipe_slow ); 9076 %} 9077 9078 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 9079 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9080 match(Set dst (XorV src1 src2)); 9081 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 9082 ins_encode %{ 9083 int vector_len = 0; 9084 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9085 %} 9086 ins_pipe( pipe_slow ); 9087 %} 9088 9089 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 9090 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 9091 match(Set dst (XorV src (LoadVector mem))); 9092 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 9093 ins_encode %{ 9094 int vector_len = 0; 9095 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9096 %} 9097 ins_pipe( pipe_slow ); 9098 %} 9099 9100 instruct vxor16B(vecX dst, vecX src) %{ 9101 predicate(UseAVX == 0 && n->as_Vector()->length_in_bytes() == 16); 9102 match(Set dst (XorV dst src)); 9103 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 9104 ins_encode %{ 9105 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 9106 %} 9107 ins_pipe( pipe_slow ); 9108 %} 9109 9110 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 9111 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9112 match(Set dst (XorV src1 src2)); 9113 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 9114 ins_encode %{ 9115 int vector_len = 0; 9116 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9117 %} 9118 ins_pipe( pipe_slow ); 9119 %} 9120 9121 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 9122 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 9123 match(Set dst (XorV src (LoadVector mem))); 9124 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 9125 ins_encode %{ 9126 int vector_len = 0; 9127 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9128 %} 9129 ins_pipe( pipe_slow ); 9130 %} 9131 9132 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 9133 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9134 match(Set dst (XorV src1 src2)); 9135 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 9136 ins_encode %{ 9137 int vector_len = 1; 9138 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9139 %} 9140 ins_pipe( pipe_slow ); 9141 %} 9142 9143 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 9144 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 9145 match(Set dst (XorV src (LoadVector mem))); 9146 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 9147 ins_encode %{ 9148 int vector_len = 1; 9149 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9150 %} 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9155 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9156 match(Set dst (XorV src1 src2)); 9157 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 9158 ins_encode %{ 9159 int vector_len = 2; 9160 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9161 %} 9162 ins_pipe( pipe_slow ); 9163 %} 9164 9165 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 9166 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 9167 match(Set dst (XorV src (LoadVector mem))); 9168 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 9169 ins_encode %{ 9170 int vector_len = 2; 9171 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9172 %} 9173 ins_pipe( pipe_slow ); 9174 %} 9175 9176 // --------------------------------- ABS -------------------------------------- 9177 // a = |a| 9178 instruct vabs4B_reg(vecS dst, vecS src) %{ 9179 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9180 match(Set dst (AbsVB src)); 9181 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed4B" %} 9182 ins_encode %{ 9183 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9184 %} 9185 ins_pipe( pipe_slow ); 9186 %} 9187 9188 instruct vabs8B_reg(vecD dst, vecD src) %{ 9189 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9190 match(Set dst (AbsVB src)); 9191 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 9192 ins_encode %{ 9193 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 instruct vabs16B_reg(vecX dst, vecX src) %{ 9199 predicate(UseSSE > 2 && n->as_Vector()->length() == 16); 9200 match(Set dst (AbsVB src)); 9201 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 9202 ins_encode %{ 9203 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 9204 %} 9205 ins_pipe( pipe_slow ); 9206 %} 9207 9208 instruct vabs32B_reg(vecY dst, vecY src) %{ 9209 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 9210 match(Set dst (AbsVB src)); 9211 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 9212 ins_encode %{ 9213 int vector_len = 1; 9214 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9215 %} 9216 ins_pipe( pipe_slow ); 9217 %} 9218 9219 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 9220 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 9221 match(Set dst (AbsVB src)); 9222 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 9223 ins_encode %{ 9224 int vector_len = 2; 9225 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9226 %} 9227 ins_pipe( pipe_slow ); 9228 %} 9229 9230 instruct vabs2S_reg(vecD dst, vecD src) %{ 9231 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9232 match(Set dst (AbsVS src)); 9233 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed2S" %} 9234 ins_encode %{ 9235 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9236 %} 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 instruct vabs4S_reg(vecD dst, vecD src) %{ 9241 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9242 match(Set dst (AbsVS src)); 9243 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 9244 ins_encode %{ 9245 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vabs8S_reg(vecX dst, vecX src) %{ 9251 predicate(UseSSE > 2 && n->as_Vector()->length() == 8); 9252 match(Set dst (AbsVS src)); 9253 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 9254 ins_encode %{ 9255 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 9256 %} 9257 ins_pipe( pipe_slow ); 9258 %} 9259 9260 instruct vabs16S_reg(vecY dst, vecY src) %{ 9261 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9262 match(Set dst (AbsVS src)); 9263 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 9264 ins_encode %{ 9265 int vector_len = 1; 9266 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 9272 predicate(UseAVX > 2 && n->as_Vector()->length() == 32); 9273 match(Set dst (AbsVS src)); 9274 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 9275 ins_encode %{ 9276 int vector_len = 2; 9277 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9278 %} 9279 ins_pipe( pipe_slow ); 9280 %} 9281 9282 instruct vabs2I_reg(vecD dst, vecD src) %{ 9283 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 9284 match(Set dst (AbsVI src)); 9285 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 9286 ins_encode %{ 9287 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9288 %} 9289 ins_pipe( pipe_slow ); 9290 %} 9291 9292 instruct vabs4I_reg(vecX dst, vecX src) %{ 9293 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 9294 match(Set dst (AbsVI src)); 9295 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 9296 ins_encode %{ 9297 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 9298 %} 9299 ins_pipe( pipe_slow ); 9300 %} 9301 9302 instruct vabs8I_reg(vecY dst, vecY src) %{ 9303 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9304 match(Set dst (AbsVI src)); 9305 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 9306 ins_encode %{ 9307 int vector_len = 1; 9308 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9309 %} 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 9314 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9315 match(Set dst (AbsVI src)); 9316 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 9317 ins_encode %{ 9318 int vector_len = 2; 9319 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct vabs2L_reg(vecX dst, vecX src) %{ 9325 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 9326 match(Set dst (AbsVL src)); 9327 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 9328 ins_encode %{ 9329 int vector_len = 0; 9330 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9331 %} 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 instruct vabs4L_reg(vecY dst, vecY src) %{ 9336 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 9337 match(Set dst (AbsVL src)); 9338 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 9339 ins_encode %{ 9340 int vector_len = 1; 9341 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 9347 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9348 match(Set dst (AbsVL src)); 9349 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 9350 ins_encode %{ 9351 int vector_len = 2; 9352 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9353 %} 9354 ins_pipe( pipe_slow ); 9355 %} 9356 9357 // --------------------------------- ABSNEG -------------------------------------- 9358 9359 instruct vabsneg2D(vecX dst, vecX src, rRegI scratch) %{ 9360 predicate(UseSSE >= 2 && n->as_Vector()->length() == 2); 9361 match(Set dst (AbsVD src)); 9362 match(Set dst (NegVD src)); 9363 effect(TEMP scratch); 9364 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed2D" %} 9365 ins_encode %{ 9366 int opcode = this->as_Mach()->ideal_Opcode(); 9367 if ($dst$$XMMRegister != $src$$XMMRegister) 9368 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9369 __ vabsnegd(opcode, $dst$$XMMRegister, $scratch$$Register); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 instruct vabsneg4D(vecY dst, vecY src, rRegI scratch) %{ 9375 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9376 match(Set dst (AbsVD src)); 9377 match(Set dst (NegVD src)); 9378 effect(TEMP scratch); 9379 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed4D" %} 9380 ins_encode %{ 9381 int opcode = this->as_Mach()->ideal_Opcode(); 9382 int vector_len = 1; 9383 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vabsneg8D(vecZ dst, vecZ src, rRegI scratch) %{ 9389 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9390 match(Set dst (AbsVD src)); 9391 match(Set dst (NegVD src)); 9392 effect(TEMP scratch); 9393 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packed8D" %} 9394 ins_encode %{ 9395 int opcode = this->as_Mach()->ideal_Opcode(); 9396 int vector_len = 2; 9397 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct vabsneg2F(vecD dst, vecD src, rRegI scratch) %{ 9403 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 9404 match(Set dst (AbsVF src)); 9405 match(Set dst (NegVF src)); 9406 effect(TEMP scratch); 9407 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed2F" %} 9408 ins_encode %{ 9409 int opcode = this->as_Mach()->ideal_Opcode(); 9410 if ($dst$$XMMRegister != $src$$XMMRegister) 9411 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 9412 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 instruct vabsneg4F(vecX dst, rRegI scratch) %{ 9418 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 9419 match(Set dst (AbsVF dst)); 9420 match(Set dst (NegVF dst)); 9421 effect(TEMP scratch); 9422 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 9423 ins_cost(150); 9424 ins_encode %{ 9425 int opcode = this->as_Mach()->ideal_Opcode(); 9426 __ vabsnegf(opcode, $dst$$XMMRegister, $scratch$$Register); 9427 %} 9428 ins_pipe( pipe_slow ); 9429 %} 9430 9431 instruct vabsneg8F(vecY dst, vecY src, rRegI scratch) %{ 9432 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9433 match(Set dst (AbsVF src)); 9434 match(Set dst (NegVF src)); 9435 effect(TEMP scratch); 9436 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed8F" %} 9437 ins_cost(150); 9438 ins_encode %{ 9439 int opcode = this->as_Mach()->ideal_Opcode(); 9440 int vector_len = 1; 9441 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9442 %} 9443 ins_pipe( pipe_slow ); 9444 %} 9445 9446 instruct vabsneg16F(vecZ dst, vecZ src, rRegI scratch) %{ 9447 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9448 match(Set dst (AbsVF src)); 9449 match(Set dst (NegVF src)); 9450 effect(TEMP scratch); 9451 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packed16F" %} 9452 ins_cost(150); 9453 ins_encode %{ 9454 int opcode = this->as_Mach()->ideal_Opcode(); 9455 int vector_len = 2; 9456 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len, $scratch$$Register); 9457 %} 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 // --------------------------------- FMA -------------------------------------- 9462 9463 // a * b + c 9464 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 9465 predicate(UseFMA && n->as_Vector()->length() == 2); 9466 match(Set c (FmaVD c (Binary a b))); 9467 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9468 ins_cost(150); 9469 ins_encode %{ 9470 int vector_len = 0; 9471 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9472 %} 9473 ins_pipe( pipe_slow ); 9474 %} 9475 9476 // a * b + c 9477 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 9478 predicate(UseFMA && n->as_Vector()->length() == 2); 9479 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9480 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 9481 ins_cost(150); 9482 ins_encode %{ 9483 int vector_len = 0; 9484 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 9490 // a * b + c 9491 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 9492 predicate(UseFMA && n->as_Vector()->length() == 4); 9493 match(Set c (FmaVD c (Binary a b))); 9494 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9495 ins_cost(150); 9496 ins_encode %{ 9497 int vector_len = 1; 9498 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 // a * b + c 9504 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 9505 predicate(UseFMA && n->as_Vector()->length() == 4); 9506 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9507 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 9508 ins_cost(150); 9509 ins_encode %{ 9510 int vector_len = 1; 9511 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9512 %} 9513 ins_pipe( pipe_slow ); 9514 %} 9515 9516 // a * b + c 9517 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 9518 predicate(UseFMA && n->as_Vector()->length() == 8); 9519 match(Set c (FmaVD c (Binary a b))); 9520 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9521 ins_cost(150); 9522 ins_encode %{ 9523 int vector_len = 2; 9524 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 // a * b + c 9530 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 9531 predicate(UseFMA && n->as_Vector()->length() == 8); 9532 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9533 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 9534 ins_cost(150); 9535 ins_encode %{ 9536 int vector_len = 2; 9537 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9538 %} 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 // a * b + c 9543 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 9544 predicate(UseFMA && n->as_Vector()->length() == 4); 9545 match(Set c (FmaVF c (Binary a b))); 9546 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9547 ins_cost(150); 9548 ins_encode %{ 9549 int vector_len = 0; 9550 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 // a * b + c 9556 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 9557 predicate(UseFMA && n->as_Vector()->length() == 4); 9558 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9559 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 9560 ins_cost(150); 9561 ins_encode %{ 9562 int vector_len = 0; 9563 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9564 %} 9565 ins_pipe( pipe_slow ); 9566 %} 9567 9568 // a * b + c 9569 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 9570 predicate(UseFMA && n->as_Vector()->length() == 8); 9571 match(Set c (FmaVF c (Binary a b))); 9572 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9573 ins_cost(150); 9574 ins_encode %{ 9575 int vector_len = 1; 9576 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 // a * b + c 9582 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 9583 predicate(UseFMA && n->as_Vector()->length() == 8); 9584 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9585 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 9586 ins_cost(150); 9587 ins_encode %{ 9588 int vector_len = 1; 9589 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9590 %} 9591 ins_pipe( pipe_slow ); 9592 %} 9593 9594 // a * b + c 9595 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 9596 predicate(UseFMA && n->as_Vector()->length() == 16); 9597 match(Set c (FmaVF c (Binary a b))); 9598 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9599 ins_cost(150); 9600 ins_encode %{ 9601 int vector_len = 2; 9602 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 // a * b + c 9608 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 9609 predicate(UseFMA && n->as_Vector()->length() == 16); 9610 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9611 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 9612 ins_cost(150); 9613 ins_encode %{ 9614 int vector_len = 2; 9615 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 // --------------------------------- PopCount -------------------------------------- 9621 9622 instruct vpopcount2I(vecD dst, vecD src) %{ 9623 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 9624 match(Set dst (PopCountVI src)); 9625 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 9626 ins_encode %{ 9627 int vector_len = 0; 9628 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9629 %} 9630 ins_pipe( pipe_slow ); 9631 %} 9632 9633 instruct vpopcount4I(vecX dst, vecX src) %{ 9634 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 9635 match(Set dst (PopCountVI src)); 9636 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 9637 ins_encode %{ 9638 int vector_len = 0; 9639 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 instruct vpopcount8I(vecY dst, vecY src) %{ 9645 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 9646 match(Set dst (PopCountVI src)); 9647 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 9648 ins_encode %{ 9649 int vector_len = 1; 9650 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9651 %} 9652 ins_pipe( pipe_slow ); 9653 %} 9654 9655 instruct vpopcount16I(vecZ dst, vecZ src) %{ 9656 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 9657 match(Set dst (PopCountVI src)); 9658 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 9659 ins_encode %{ 9660 int vector_len = 2; 9661 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9662 %} 9663 ins_pipe( pipe_slow ); 9664 %}