1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 %} 1071 1072 1073 //----------SOURCE BLOCK------------------------------------------------------- 1074 // This is a block of C++ code which provides values, functions, and 1075 // definitions necessary in the rest of the architecture description 1076 1077 source_hpp %{ 1078 // Header information of the source block. 1079 // Method declarations/definitions which are used outside 1080 // the ad-scope can conveniently be defined here. 1081 // 1082 // To keep related declarations/definitions/uses close together, 1083 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1084 1085 class NativeJump; 1086 1087 class CallStubImpl { 1088 1089 //-------------------------------------------------------------- 1090 //---< Used for optimization in Compile::shorten_branches >--- 1091 //-------------------------------------------------------------- 1092 1093 public: 1094 // Size of call trampoline stub. 1095 static uint size_call_trampoline() { 1096 return 0; // no call trampolines on this platform 1097 } 1098 1099 // number of relocations needed by a call trampoline stub 1100 static uint reloc_call_trampoline() { 1101 return 0; // no call trampolines on this platform 1102 } 1103 }; 1104 1105 class HandlerImpl { 1106 1107 public: 1108 1109 static int emit_exception_handler(CodeBuffer &cbuf); 1110 static int emit_deopt_handler(CodeBuffer& cbuf); 1111 1112 static uint size_exception_handler() { 1113 // NativeCall instruction size is the same as NativeJump. 1114 // exception handler starts out as jump and can be patched to 1115 // a call be deoptimization. (4932387) 1116 // Note that this value is also credited (in output.cpp) to 1117 // the size of the code section. 1118 return NativeJump::instruction_size; 1119 } 1120 1121 #ifdef _LP64 1122 static uint size_deopt_handler() { 1123 // three 5 byte instructions 1124 return 15; 1125 } 1126 #else 1127 static uint size_deopt_handler() { 1128 // NativeCall instruction size is the same as NativeJump. 1129 // exception handler starts out as jump and can be patched to 1130 // a call be deoptimization. (4932387) 1131 // Note that this value is also credited (in output.cpp) to 1132 // the size of the code section. 1133 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1134 } 1135 #endif 1136 }; 1137 1138 %} // end source_hpp 1139 1140 source %{ 1141 1142 #include "opto/addnode.hpp" 1143 1144 // Emit exception handler code. 1145 // Stuff framesize into a register and call a VM stub routine. 1146 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1147 1148 // Note that the code buffer's insts_mark is always relative to insts. 1149 // That's why we must use the macroassembler to generate a handler. 1150 MacroAssembler _masm(&cbuf); 1151 address base = __ start_a_stub(size_exception_handler()); 1152 if (base == NULL) { 1153 ciEnv::current()->record_failure("CodeCache is full"); 1154 return 0; // CodeBuffer::expand failed 1155 } 1156 int offset = __ offset(); 1157 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1158 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1159 __ end_a_stub(); 1160 return offset; 1161 } 1162 1163 // Emit deopt handler code. 1164 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1165 1166 // Note that the code buffer's insts_mark is always relative to insts. 1167 // That's why we must use the macroassembler to generate a handler. 1168 MacroAssembler _masm(&cbuf); 1169 address base = __ start_a_stub(size_deopt_handler()); 1170 if (base == NULL) { 1171 ciEnv::current()->record_failure("CodeCache is full"); 1172 return 0; // CodeBuffer::expand failed 1173 } 1174 int offset = __ offset(); 1175 1176 #ifdef _LP64 1177 address the_pc = (address) __ pc(); 1178 Label next; 1179 // push a "the_pc" on the stack without destroying any registers 1180 // as they all may be live. 1181 1182 // push address of "next" 1183 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1184 __ bind(next); 1185 // adjust it so it matches "the_pc" 1186 __ subptr(Address(rsp, 0), __ offset() - offset); 1187 #else 1188 InternalAddress here(__ pc()); 1189 __ pushptr(here.addr()); 1190 #endif 1191 1192 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1193 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1194 __ end_a_stub(); 1195 return offset; 1196 } 1197 1198 1199 //============================================================================= 1200 1201 // Float masks come from different places depending on platform. 1202 #ifdef _LP64 1203 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1204 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1205 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1206 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1207 #else 1208 static address float_signmask() { return (address)float_signmask_pool; } 1209 static address float_signflip() { return (address)float_signflip_pool; } 1210 static address double_signmask() { return (address)double_signmask_pool; } 1211 static address double_signflip() { return (address)double_signflip_pool; } 1212 #endif 1213 1214 1215 const bool Matcher::match_rule_supported(int opcode) { 1216 if (!has_match_rule(opcode)) 1217 return false; 1218 1219 bool ret_value = true; 1220 switch (opcode) { 1221 case Op_PopCountI: 1222 case Op_PopCountL: 1223 if (!UsePopCountInstruction) 1224 ret_value = false; 1225 break; 1226 case Op_MulVI: 1227 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1228 ret_value = false; 1229 break; 1230 case Op_MulVL: 1231 case Op_MulReductionVL: 1232 if (VM_Version::supports_avx512dq() == false) 1233 ret_value = false; 1234 break; 1235 case Op_AddReductionVL: 1236 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1237 ret_value = false; 1238 break; 1239 case Op_AddReductionVI: 1240 if (UseSSE < 3) // requires at least SSE3 1241 ret_value = false; 1242 break; 1243 case Op_MulReductionVI: 1244 if (UseSSE < 4) // requires at least SSE4 1245 ret_value = false; 1246 break; 1247 case Op_AddReductionVF: 1248 case Op_AddReductionVD: 1249 case Op_MulReductionVF: 1250 case Op_MulReductionVD: 1251 if (UseSSE < 1) // requires at least SSE 1252 ret_value = false; 1253 break; 1254 case Op_SqrtVD: 1255 if (UseAVX < 1) // enabled for AVX only 1256 ret_value = false; 1257 break; 1258 case Op_CompareAndSwapL: 1259 #ifdef _LP64 1260 case Op_CompareAndSwapP: 1261 #endif 1262 if (!VM_Version::supports_cx8()) 1263 ret_value = false; 1264 break; 1265 case Op_CMoveVD: 1266 if (UseAVX < 1 || UseAVX > 2) 1267 ret_value = false; 1268 break; 1269 case Op_StrIndexOf: 1270 if (!UseSSE42Intrinsics) 1271 ret_value = false; 1272 break; 1273 case Op_StrIndexOfChar: 1274 if (!UseSSE42Intrinsics) 1275 ret_value = false; 1276 break; 1277 case Op_OnSpinWait: 1278 if (VM_Version::supports_on_spin_wait() == false) 1279 ret_value = false; 1280 break; 1281 } 1282 1283 return ret_value; // Per default match rules are supported. 1284 } 1285 1286 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1287 // identify extra cases that we might want to provide match rules for 1288 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1289 bool ret_value = match_rule_supported(opcode); 1290 if (ret_value) { 1291 switch (opcode) { 1292 case Op_AddVB: 1293 case Op_SubVB: 1294 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1295 ret_value = false; 1296 break; 1297 case Op_URShiftVS: 1298 case Op_RShiftVS: 1299 case Op_LShiftVS: 1300 case Op_MulVS: 1301 case Op_AddVS: 1302 case Op_SubVS: 1303 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1304 ret_value = false; 1305 break; 1306 case Op_CMoveVD: 1307 if (vlen != 4) 1308 ret_value = false; 1309 break; 1310 } 1311 } 1312 1313 return ret_value; // Per default match rules are supported. 1314 } 1315 1316 const bool Matcher::has_predicated_vectors(void) { 1317 bool ret_value = false; 1318 if (UseAVX > 2) { 1319 ret_value = VM_Version::supports_avx512vl(); 1320 } 1321 1322 return ret_value; 1323 } 1324 1325 const int Matcher::float_pressure(int default_pressure_threshold) { 1326 int float_pressure_threshold = default_pressure_threshold; 1327 #ifdef _LP64 1328 if (UseAVX > 2) { 1329 // Increase pressure threshold on machines with AVX3 which have 1330 // 2x more XMM registers. 1331 float_pressure_threshold = default_pressure_threshold * 2; 1332 } 1333 #endif 1334 return float_pressure_threshold; 1335 } 1336 1337 // Max vector size in bytes. 0 if not supported. 1338 const int Matcher::vector_width_in_bytes(BasicType bt) { 1339 assert(is_java_primitive(bt), "only primitive type vectors"); 1340 if (UseSSE < 2) return 0; 1341 // SSE2 supports 128bit vectors for all types. 1342 // AVX2 supports 256bit vectors for all types. 1343 // AVX2/EVEX supports 512bit vectors for all types. 1344 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1345 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1346 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1347 size = (UseAVX > 2) ? 64 : 32; 1348 // Use flag to limit vector size. 1349 size = MIN2(size,(int)MaxVectorSize); 1350 // Minimum 2 values in vector (or 4 for bytes). 1351 switch (bt) { 1352 case T_DOUBLE: 1353 case T_LONG: 1354 if (size < 16) return 0; 1355 break; 1356 case T_FLOAT: 1357 case T_INT: 1358 if (size < 8) return 0; 1359 break; 1360 case T_BOOLEAN: 1361 if (size < 4) return 0; 1362 break; 1363 case T_CHAR: 1364 if (size < 4) return 0; 1365 break; 1366 case T_BYTE: 1367 if (size < 4) return 0; 1368 break; 1369 case T_SHORT: 1370 if (size < 4) return 0; 1371 break; 1372 default: 1373 ShouldNotReachHere(); 1374 } 1375 return size; 1376 } 1377 1378 // Limits on vector size (number of elements) loaded into vector. 1379 const int Matcher::max_vector_size(const BasicType bt) { 1380 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1381 } 1382 const int Matcher::min_vector_size(const BasicType bt) { 1383 int max_size = max_vector_size(bt); 1384 // Min size which can be loaded into vector is 4 bytes. 1385 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1386 return MIN2(size,max_size); 1387 } 1388 1389 // Vector ideal reg corresponding to specidied size in bytes 1390 const uint Matcher::vector_ideal_reg(int size) { 1391 switch(size) { 1392 case 4: return Op_VecS; 1393 case 8: return Op_VecD; 1394 case 16: return Op_VecX; 1395 case 32: return Op_VecY; 1396 case 64: return Op_VecZ; 1397 } 1398 ShouldNotReachHere(); 1399 return 0; 1400 } 1401 1402 // Only lowest bits of xmm reg are used for vector shift count. 1403 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1404 return Op_VecS; 1405 } 1406 1407 // x86 supports misaligned vectors store/load. 1408 const bool Matcher::misaligned_vectors_ok() { 1409 return !AlignVector; // can be changed by flag 1410 } 1411 1412 // x86 AES instructions are compatible with SunJCE expanded 1413 // keys, hence we do not need to pass the original key to stubs 1414 const bool Matcher::pass_original_key_for_aes() { 1415 return false; 1416 } 1417 1418 1419 const bool Matcher::convi2l_type_required = true; 1420 1421 // Check for shift by small constant as well 1422 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1423 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1424 shift->in(2)->get_int() <= 3 && 1425 // Are there other uses besides address expressions? 1426 !matcher->is_visited(shift)) { 1427 address_visited.set(shift->_idx); // Flag as address_visited 1428 mstack.push(shift->in(2), Matcher::Visit); 1429 Node *conv = shift->in(1); 1430 #ifdef _LP64 1431 // Allow Matcher to match the rule which bypass 1432 // ConvI2L operation for an array index on LP64 1433 // if the index value is positive. 1434 if (conv->Opcode() == Op_ConvI2L && 1435 conv->as_Type()->type()->is_long()->_lo >= 0 && 1436 // Are there other uses besides address expressions? 1437 !matcher->is_visited(conv)) { 1438 address_visited.set(conv->_idx); // Flag as address_visited 1439 mstack.push(conv->in(1), Matcher::Pre_Visit); 1440 } else 1441 #endif 1442 mstack.push(conv, Matcher::Pre_Visit); 1443 return true; 1444 } 1445 return false; 1446 } 1447 1448 // Should the Matcher clone shifts on addressing modes, expecting them 1449 // to be subsumed into complex addressing expressions or compute them 1450 // into registers? 1451 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1452 Node *off = m->in(AddPNode::Offset); 1453 if (off->is_Con()) { 1454 address_visited.test_set(m->_idx); // Flag as address_visited 1455 Node *adr = m->in(AddPNode::Address); 1456 1457 // Intel can handle 2 adds in addressing mode 1458 // AtomicAdd is not an addressing expression. 1459 // Cheap to find it by looking for screwy base. 1460 if (adr->is_AddP() && 1461 !adr->in(AddPNode::Base)->is_top() && 1462 // Are there other uses besides address expressions? 1463 !is_visited(adr)) { 1464 address_visited.set(adr->_idx); // Flag as address_visited 1465 Node *shift = adr->in(AddPNode::Offset); 1466 if (!clone_shift(shift, this, mstack, address_visited)) { 1467 mstack.push(shift, Pre_Visit); 1468 } 1469 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1470 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1471 } else { 1472 mstack.push(adr, Pre_Visit); 1473 } 1474 1475 // Clone X+offset as it also folds into most addressing expressions 1476 mstack.push(off, Visit); 1477 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1478 return true; 1479 } else if (clone_shift(off, this, mstack, address_visited)) { 1480 address_visited.test_set(m->_idx); // Flag as address_visited 1481 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1482 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1483 return true; 1484 } 1485 return false; 1486 } 1487 1488 void Compile::reshape_address(AddPNode* addp) { 1489 } 1490 1491 // Helper methods for MachSpillCopyNode::implementation(). 1492 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1493 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1494 // In 64-bit VM size calculation is very complex. Emitting instructions 1495 // into scratch buffer is used to get size in 64-bit VM. 1496 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1497 assert(ireg == Op_VecS || // 32bit vector 1498 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1499 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1500 "no non-adjacent vector moves" ); 1501 if (cbuf) { 1502 MacroAssembler _masm(cbuf); 1503 int offset = __ offset(); 1504 switch (ireg) { 1505 case Op_VecS: // copy whole register 1506 case Op_VecD: 1507 case Op_VecX: 1508 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1509 break; 1510 case Op_VecY: 1511 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1512 break; 1513 case Op_VecZ: 1514 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1515 break; 1516 default: 1517 ShouldNotReachHere(); 1518 } 1519 int size = __ offset() - offset; 1520 #ifdef ASSERT 1521 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1522 assert(!do_size || size == 4, "incorrect size calculattion"); 1523 #endif 1524 return size; 1525 #ifndef PRODUCT 1526 } else if (!do_size) { 1527 switch (ireg) { 1528 case Op_VecS: 1529 case Op_VecD: 1530 case Op_VecX: 1531 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1532 break; 1533 case Op_VecY: 1534 case Op_VecZ: 1535 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1536 break; 1537 default: 1538 ShouldNotReachHere(); 1539 } 1540 #endif 1541 } 1542 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1543 return (UseAVX > 2) ? 6 : 4; 1544 } 1545 1546 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1547 int stack_offset, int reg, uint ireg, outputStream* st) { 1548 // In 64-bit VM size calculation is very complex. Emitting instructions 1549 // into scratch buffer is used to get size in 64-bit VM. 1550 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1551 if (cbuf) { 1552 MacroAssembler _masm(cbuf); 1553 int offset = __ offset(); 1554 if (is_load) { 1555 switch (ireg) { 1556 case Op_VecS: 1557 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1558 break; 1559 case Op_VecD: 1560 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1561 break; 1562 case Op_VecX: 1563 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1564 break; 1565 case Op_VecY: 1566 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1567 break; 1568 case Op_VecZ: 1569 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1570 break; 1571 default: 1572 ShouldNotReachHere(); 1573 } 1574 } else { // store 1575 switch (ireg) { 1576 case Op_VecS: 1577 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1578 break; 1579 case Op_VecD: 1580 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1581 break; 1582 case Op_VecX: 1583 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1584 break; 1585 case Op_VecY: 1586 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1587 break; 1588 case Op_VecZ: 1589 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1590 break; 1591 default: 1592 ShouldNotReachHere(); 1593 } 1594 } 1595 int size = __ offset() - offset; 1596 #ifdef ASSERT 1597 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1598 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1599 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1600 #endif 1601 return size; 1602 #ifndef PRODUCT 1603 } else if (!do_size) { 1604 if (is_load) { 1605 switch (ireg) { 1606 case Op_VecS: 1607 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1608 break; 1609 case Op_VecD: 1610 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1611 break; 1612 case Op_VecX: 1613 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1614 break; 1615 case Op_VecY: 1616 case Op_VecZ: 1617 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1618 break; 1619 default: 1620 ShouldNotReachHere(); 1621 } 1622 } else { // store 1623 switch (ireg) { 1624 case Op_VecS: 1625 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1626 break; 1627 case Op_VecD: 1628 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1629 break; 1630 case Op_VecX: 1631 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1632 break; 1633 case Op_VecY: 1634 case Op_VecZ: 1635 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1636 break; 1637 default: 1638 ShouldNotReachHere(); 1639 } 1640 } 1641 #endif 1642 } 1643 bool is_single_byte = false; 1644 int vec_len = 0; 1645 if ((UseAVX > 2) && (stack_offset != 0)) { 1646 int tuple_type = Assembler::EVEX_FVM; 1647 int input_size = Assembler::EVEX_32bit; 1648 switch (ireg) { 1649 case Op_VecS: 1650 tuple_type = Assembler::EVEX_T1S; 1651 break; 1652 case Op_VecD: 1653 tuple_type = Assembler::EVEX_T1S; 1654 input_size = Assembler::EVEX_64bit; 1655 break; 1656 case Op_VecX: 1657 break; 1658 case Op_VecY: 1659 vec_len = 1; 1660 break; 1661 case Op_VecZ: 1662 vec_len = 2; 1663 break; 1664 } 1665 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1666 } 1667 int offset_size = 0; 1668 int size = 5; 1669 if (UseAVX > 2 ) { 1670 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1671 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1672 size += 2; // Need an additional two bytes for EVEX encoding 1673 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1674 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1675 } else { 1676 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1677 size += 2; // Need an additional two bytes for EVEX encodding 1678 } 1679 } else { 1680 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1681 } 1682 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1683 return size+offset_size; 1684 } 1685 1686 static inline jint replicate4_imm(int con, int width) { 1687 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1688 assert(width == 1 || width == 2, "only byte or short types here"); 1689 int bit_width = width * 8; 1690 jint val = con; 1691 val &= (1 << bit_width) - 1; // mask off sign bits 1692 while(bit_width < 32) { 1693 val |= (val << bit_width); 1694 bit_width <<= 1; 1695 } 1696 return val; 1697 } 1698 1699 static inline jlong replicate8_imm(int con, int width) { 1700 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1701 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1702 int bit_width = width * 8; 1703 jlong val = con; 1704 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1705 while(bit_width < 64) { 1706 val |= (val << bit_width); 1707 bit_width <<= 1; 1708 } 1709 return val; 1710 } 1711 1712 #ifndef PRODUCT 1713 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1714 st->print("nop \t# %d bytes pad for loops and calls", _count); 1715 } 1716 #endif 1717 1718 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1719 MacroAssembler _masm(&cbuf); 1720 __ nop(_count); 1721 } 1722 1723 uint MachNopNode::size(PhaseRegAlloc*) const { 1724 return _count; 1725 } 1726 1727 #ifndef PRODUCT 1728 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1729 st->print("# breakpoint"); 1730 } 1731 #endif 1732 1733 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1734 MacroAssembler _masm(&cbuf); 1735 __ int3(); 1736 } 1737 1738 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1739 return MachNode::size(ra_); 1740 } 1741 1742 %} 1743 1744 encode %{ 1745 1746 enc_class call_epilog %{ 1747 if (VerifyStackAtCalls) { 1748 // Check that stack depth is unchanged: find majik cookie on stack 1749 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1750 MacroAssembler _masm(&cbuf); 1751 Label L; 1752 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1753 __ jccb(Assembler::equal, L); 1754 // Die if stack mismatch 1755 __ int3(); 1756 __ bind(L); 1757 } 1758 %} 1759 1760 %} 1761 1762 1763 //----------OPERANDS----------------------------------------------------------- 1764 // Operand definitions must precede instruction definitions for correct parsing 1765 // in the ADLC because operands constitute user defined types which are used in 1766 // instruction definitions. 1767 1768 // This one generically applies only for evex, so only one version 1769 operand vecZ() %{ 1770 constraint(ALLOC_IN_RC(vectorz_reg)); 1771 match(VecZ); 1772 1773 format %{ %} 1774 interface(REG_INTER); 1775 %} 1776 1777 // Comparison Code for FP conditional move 1778 operand cmpOp_vcmppd() %{ 1779 match(Bool); 1780 1781 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1782 n->as_Bool()->_test._test != BoolTest::no_overflow); 1783 format %{ "" %} 1784 interface(COND_INTER) %{ 1785 equal (0x0, "eq"); 1786 less (0x1, "lt"); 1787 less_equal (0x2, "le"); 1788 not_equal (0xC, "ne"); 1789 greater_equal(0xD, "ge"); 1790 greater (0xE, "gt"); 1791 //TODO cannot compile (adlc breaks) without two next lines with error: 1792 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1793 // equal' for overflow. 1794 overflow (0x20, "o"); // not really supported by the instruction 1795 no_overflow (0x21, "no"); // not really supported by the instruction 1796 %} 1797 %} 1798 1799 1800 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1801 1802 // ============================================================================ 1803 1804 instruct ShouldNotReachHere() %{ 1805 match(Halt); 1806 format %{ "ud2\t# ShouldNotReachHere" %} 1807 ins_encode %{ 1808 __ ud2(); 1809 %} 1810 ins_pipe(pipe_slow); 1811 %} 1812 1813 // =================================EVEX special=============================== 1814 1815 instruct setMask(rRegI dst, rRegI src) %{ 1816 predicate(Matcher::has_predicated_vectors()); 1817 match(Set dst (SetVectMaskI src)); 1818 effect(TEMP dst); 1819 format %{ "setvectmask $dst, $src" %} 1820 ins_encode %{ 1821 __ setvectmask($dst$$Register, $src$$Register); 1822 %} 1823 ins_pipe(pipe_slow); 1824 %} 1825 1826 // ============================================================================ 1827 1828 instruct addF_reg(regF dst, regF src) %{ 1829 predicate((UseSSE>=1) && (UseAVX == 0)); 1830 match(Set dst (AddF dst src)); 1831 1832 format %{ "addss $dst, $src" %} 1833 ins_cost(150); 1834 ins_encode %{ 1835 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1836 %} 1837 ins_pipe(pipe_slow); 1838 %} 1839 1840 instruct addF_mem(regF dst, memory src) %{ 1841 predicate((UseSSE>=1) && (UseAVX == 0)); 1842 match(Set dst (AddF dst (LoadF src))); 1843 1844 format %{ "addss $dst, $src" %} 1845 ins_cost(150); 1846 ins_encode %{ 1847 __ addss($dst$$XMMRegister, $src$$Address); 1848 %} 1849 ins_pipe(pipe_slow); 1850 %} 1851 1852 instruct addF_imm(regF dst, immF con) %{ 1853 predicate((UseSSE>=1) && (UseAVX == 0)); 1854 match(Set dst (AddF dst con)); 1855 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1856 ins_cost(150); 1857 ins_encode %{ 1858 __ addss($dst$$XMMRegister, $constantaddress($con)); 1859 %} 1860 ins_pipe(pipe_slow); 1861 %} 1862 1863 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1864 predicate(UseAVX > 0); 1865 match(Set dst (AddF src1 src2)); 1866 1867 format %{ "vaddss $dst, $src1, $src2" %} 1868 ins_cost(150); 1869 ins_encode %{ 1870 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1871 %} 1872 ins_pipe(pipe_slow); 1873 %} 1874 1875 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1876 predicate(UseAVX > 0); 1877 match(Set dst (AddF src1 (LoadF src2))); 1878 1879 format %{ "vaddss $dst, $src1, $src2" %} 1880 ins_cost(150); 1881 ins_encode %{ 1882 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1883 %} 1884 ins_pipe(pipe_slow); 1885 %} 1886 1887 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1888 predicate(UseAVX > 0); 1889 match(Set dst (AddF src con)); 1890 1891 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1892 ins_cost(150); 1893 ins_encode %{ 1894 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1895 %} 1896 ins_pipe(pipe_slow); 1897 %} 1898 1899 instruct addD_reg(regD dst, regD src) %{ 1900 predicate((UseSSE>=2) && (UseAVX == 0)); 1901 match(Set dst (AddD dst src)); 1902 1903 format %{ "addsd $dst, $src" %} 1904 ins_cost(150); 1905 ins_encode %{ 1906 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1907 %} 1908 ins_pipe(pipe_slow); 1909 %} 1910 1911 instruct addD_mem(regD dst, memory src) %{ 1912 predicate((UseSSE>=2) && (UseAVX == 0)); 1913 match(Set dst (AddD dst (LoadD src))); 1914 1915 format %{ "addsd $dst, $src" %} 1916 ins_cost(150); 1917 ins_encode %{ 1918 __ addsd($dst$$XMMRegister, $src$$Address); 1919 %} 1920 ins_pipe(pipe_slow); 1921 %} 1922 1923 instruct addD_imm(regD dst, immD con) %{ 1924 predicate((UseSSE>=2) && (UseAVX == 0)); 1925 match(Set dst (AddD dst con)); 1926 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1927 ins_cost(150); 1928 ins_encode %{ 1929 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1930 %} 1931 ins_pipe(pipe_slow); 1932 %} 1933 1934 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1935 predicate(UseAVX > 0); 1936 match(Set dst (AddD src1 src2)); 1937 1938 format %{ "vaddsd $dst, $src1, $src2" %} 1939 ins_cost(150); 1940 ins_encode %{ 1941 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1942 %} 1943 ins_pipe(pipe_slow); 1944 %} 1945 1946 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1947 predicate(UseAVX > 0); 1948 match(Set dst (AddD src1 (LoadD src2))); 1949 1950 format %{ "vaddsd $dst, $src1, $src2" %} 1951 ins_cost(150); 1952 ins_encode %{ 1953 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1954 %} 1955 ins_pipe(pipe_slow); 1956 %} 1957 1958 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1959 predicate(UseAVX > 0); 1960 match(Set dst (AddD src con)); 1961 1962 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1963 ins_cost(150); 1964 ins_encode %{ 1965 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1966 %} 1967 ins_pipe(pipe_slow); 1968 %} 1969 1970 instruct subF_reg(regF dst, regF src) %{ 1971 predicate((UseSSE>=1) && (UseAVX == 0)); 1972 match(Set dst (SubF dst src)); 1973 1974 format %{ "subss $dst, $src" %} 1975 ins_cost(150); 1976 ins_encode %{ 1977 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1978 %} 1979 ins_pipe(pipe_slow); 1980 %} 1981 1982 instruct subF_mem(regF dst, memory src) %{ 1983 predicate((UseSSE>=1) && (UseAVX == 0)); 1984 match(Set dst (SubF dst (LoadF src))); 1985 1986 format %{ "subss $dst, $src" %} 1987 ins_cost(150); 1988 ins_encode %{ 1989 __ subss($dst$$XMMRegister, $src$$Address); 1990 %} 1991 ins_pipe(pipe_slow); 1992 %} 1993 1994 instruct subF_imm(regF dst, immF con) %{ 1995 predicate((UseSSE>=1) && (UseAVX == 0)); 1996 match(Set dst (SubF dst con)); 1997 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1998 ins_cost(150); 1999 ins_encode %{ 2000 __ subss($dst$$XMMRegister, $constantaddress($con)); 2001 %} 2002 ins_pipe(pipe_slow); 2003 %} 2004 2005 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2006 predicate(UseAVX > 0); 2007 match(Set dst (SubF src1 src2)); 2008 2009 format %{ "vsubss $dst, $src1, $src2" %} 2010 ins_cost(150); 2011 ins_encode %{ 2012 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2013 %} 2014 ins_pipe(pipe_slow); 2015 %} 2016 2017 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2018 predicate(UseAVX > 0); 2019 match(Set dst (SubF src1 (LoadF src2))); 2020 2021 format %{ "vsubss $dst, $src1, $src2" %} 2022 ins_cost(150); 2023 ins_encode %{ 2024 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2025 %} 2026 ins_pipe(pipe_slow); 2027 %} 2028 2029 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2030 predicate(UseAVX > 0); 2031 match(Set dst (SubF src con)); 2032 2033 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2034 ins_cost(150); 2035 ins_encode %{ 2036 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2037 %} 2038 ins_pipe(pipe_slow); 2039 %} 2040 2041 instruct subD_reg(regD dst, regD src) %{ 2042 predicate((UseSSE>=2) && (UseAVX == 0)); 2043 match(Set dst (SubD dst src)); 2044 2045 format %{ "subsd $dst, $src" %} 2046 ins_cost(150); 2047 ins_encode %{ 2048 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2049 %} 2050 ins_pipe(pipe_slow); 2051 %} 2052 2053 instruct subD_mem(regD dst, memory src) %{ 2054 predicate((UseSSE>=2) && (UseAVX == 0)); 2055 match(Set dst (SubD dst (LoadD src))); 2056 2057 format %{ "subsd $dst, $src" %} 2058 ins_cost(150); 2059 ins_encode %{ 2060 __ subsd($dst$$XMMRegister, $src$$Address); 2061 %} 2062 ins_pipe(pipe_slow); 2063 %} 2064 2065 instruct subD_imm(regD dst, immD con) %{ 2066 predicate((UseSSE>=2) && (UseAVX == 0)); 2067 match(Set dst (SubD dst con)); 2068 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2069 ins_cost(150); 2070 ins_encode %{ 2071 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2072 %} 2073 ins_pipe(pipe_slow); 2074 %} 2075 2076 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2077 predicate(UseAVX > 0); 2078 match(Set dst (SubD src1 src2)); 2079 2080 format %{ "vsubsd $dst, $src1, $src2" %} 2081 ins_cost(150); 2082 ins_encode %{ 2083 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2084 %} 2085 ins_pipe(pipe_slow); 2086 %} 2087 2088 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2089 predicate(UseAVX > 0); 2090 match(Set dst (SubD src1 (LoadD src2))); 2091 2092 format %{ "vsubsd $dst, $src1, $src2" %} 2093 ins_cost(150); 2094 ins_encode %{ 2095 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2096 %} 2097 ins_pipe(pipe_slow); 2098 %} 2099 2100 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2101 predicate(UseAVX > 0); 2102 match(Set dst (SubD src con)); 2103 2104 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2105 ins_cost(150); 2106 ins_encode %{ 2107 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2108 %} 2109 ins_pipe(pipe_slow); 2110 %} 2111 2112 instruct mulF_reg(regF dst, regF src) %{ 2113 predicate((UseSSE>=1) && (UseAVX == 0)); 2114 match(Set dst (MulF dst src)); 2115 2116 format %{ "mulss $dst, $src" %} 2117 ins_cost(150); 2118 ins_encode %{ 2119 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2120 %} 2121 ins_pipe(pipe_slow); 2122 %} 2123 2124 instruct mulF_mem(regF dst, memory src) %{ 2125 predicate((UseSSE>=1) && (UseAVX == 0)); 2126 match(Set dst (MulF dst (LoadF src))); 2127 2128 format %{ "mulss $dst, $src" %} 2129 ins_cost(150); 2130 ins_encode %{ 2131 __ mulss($dst$$XMMRegister, $src$$Address); 2132 %} 2133 ins_pipe(pipe_slow); 2134 %} 2135 2136 instruct mulF_imm(regF dst, immF con) %{ 2137 predicate((UseSSE>=1) && (UseAVX == 0)); 2138 match(Set dst (MulF dst con)); 2139 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2140 ins_cost(150); 2141 ins_encode %{ 2142 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2143 %} 2144 ins_pipe(pipe_slow); 2145 %} 2146 2147 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2148 predicate(UseAVX > 0); 2149 match(Set dst (MulF src1 src2)); 2150 2151 format %{ "vmulss $dst, $src1, $src2" %} 2152 ins_cost(150); 2153 ins_encode %{ 2154 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2155 %} 2156 ins_pipe(pipe_slow); 2157 %} 2158 2159 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2160 predicate(UseAVX > 0); 2161 match(Set dst (MulF src1 (LoadF src2))); 2162 2163 format %{ "vmulss $dst, $src1, $src2" %} 2164 ins_cost(150); 2165 ins_encode %{ 2166 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2167 %} 2168 ins_pipe(pipe_slow); 2169 %} 2170 2171 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2172 predicate(UseAVX > 0); 2173 match(Set dst (MulF src con)); 2174 2175 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2176 ins_cost(150); 2177 ins_encode %{ 2178 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2179 %} 2180 ins_pipe(pipe_slow); 2181 %} 2182 2183 instruct mulD_reg(regD dst, regD src) %{ 2184 predicate((UseSSE>=2) && (UseAVX == 0)); 2185 match(Set dst (MulD dst src)); 2186 2187 format %{ "mulsd $dst, $src" %} 2188 ins_cost(150); 2189 ins_encode %{ 2190 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2191 %} 2192 ins_pipe(pipe_slow); 2193 %} 2194 2195 instruct mulD_mem(regD dst, memory src) %{ 2196 predicate((UseSSE>=2) && (UseAVX == 0)); 2197 match(Set dst (MulD dst (LoadD src))); 2198 2199 format %{ "mulsd $dst, $src" %} 2200 ins_cost(150); 2201 ins_encode %{ 2202 __ mulsd($dst$$XMMRegister, $src$$Address); 2203 %} 2204 ins_pipe(pipe_slow); 2205 %} 2206 2207 instruct mulD_imm(regD dst, immD con) %{ 2208 predicate((UseSSE>=2) && (UseAVX == 0)); 2209 match(Set dst (MulD dst con)); 2210 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2211 ins_cost(150); 2212 ins_encode %{ 2213 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2214 %} 2215 ins_pipe(pipe_slow); 2216 %} 2217 2218 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2219 predicate(UseAVX > 0); 2220 match(Set dst (MulD src1 src2)); 2221 2222 format %{ "vmulsd $dst, $src1, $src2" %} 2223 ins_cost(150); 2224 ins_encode %{ 2225 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2226 %} 2227 ins_pipe(pipe_slow); 2228 %} 2229 2230 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2231 predicate(UseAVX > 0); 2232 match(Set dst (MulD src1 (LoadD src2))); 2233 2234 format %{ "vmulsd $dst, $src1, $src2" %} 2235 ins_cost(150); 2236 ins_encode %{ 2237 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2238 %} 2239 ins_pipe(pipe_slow); 2240 %} 2241 2242 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2243 predicate(UseAVX > 0); 2244 match(Set dst (MulD src con)); 2245 2246 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2247 ins_cost(150); 2248 ins_encode %{ 2249 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2250 %} 2251 ins_pipe(pipe_slow); 2252 %} 2253 2254 instruct divF_reg(regF dst, regF src) %{ 2255 predicate((UseSSE>=1) && (UseAVX == 0)); 2256 match(Set dst (DivF dst src)); 2257 2258 format %{ "divss $dst, $src" %} 2259 ins_cost(150); 2260 ins_encode %{ 2261 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2262 %} 2263 ins_pipe(pipe_slow); 2264 %} 2265 2266 instruct divF_mem(regF dst, memory src) %{ 2267 predicate((UseSSE>=1) && (UseAVX == 0)); 2268 match(Set dst (DivF dst (LoadF src))); 2269 2270 format %{ "divss $dst, $src" %} 2271 ins_cost(150); 2272 ins_encode %{ 2273 __ divss($dst$$XMMRegister, $src$$Address); 2274 %} 2275 ins_pipe(pipe_slow); 2276 %} 2277 2278 instruct divF_imm(regF dst, immF con) %{ 2279 predicate((UseSSE>=1) && (UseAVX == 0)); 2280 match(Set dst (DivF dst con)); 2281 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2282 ins_cost(150); 2283 ins_encode %{ 2284 __ divss($dst$$XMMRegister, $constantaddress($con)); 2285 %} 2286 ins_pipe(pipe_slow); 2287 %} 2288 2289 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2290 predicate(UseAVX > 0); 2291 match(Set dst (DivF src1 src2)); 2292 2293 format %{ "vdivss $dst, $src1, $src2" %} 2294 ins_cost(150); 2295 ins_encode %{ 2296 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2297 %} 2298 ins_pipe(pipe_slow); 2299 %} 2300 2301 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2302 predicate(UseAVX > 0); 2303 match(Set dst (DivF src1 (LoadF src2))); 2304 2305 format %{ "vdivss $dst, $src1, $src2" %} 2306 ins_cost(150); 2307 ins_encode %{ 2308 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2309 %} 2310 ins_pipe(pipe_slow); 2311 %} 2312 2313 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2314 predicate(UseAVX > 0); 2315 match(Set dst (DivF src con)); 2316 2317 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2318 ins_cost(150); 2319 ins_encode %{ 2320 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2321 %} 2322 ins_pipe(pipe_slow); 2323 %} 2324 2325 instruct divD_reg(regD dst, regD src) %{ 2326 predicate((UseSSE>=2) && (UseAVX == 0)); 2327 match(Set dst (DivD dst src)); 2328 2329 format %{ "divsd $dst, $src" %} 2330 ins_cost(150); 2331 ins_encode %{ 2332 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2333 %} 2334 ins_pipe(pipe_slow); 2335 %} 2336 2337 instruct divD_mem(regD dst, memory src) %{ 2338 predicate((UseSSE>=2) && (UseAVX == 0)); 2339 match(Set dst (DivD dst (LoadD src))); 2340 2341 format %{ "divsd $dst, $src" %} 2342 ins_cost(150); 2343 ins_encode %{ 2344 __ divsd($dst$$XMMRegister, $src$$Address); 2345 %} 2346 ins_pipe(pipe_slow); 2347 %} 2348 2349 instruct divD_imm(regD dst, immD con) %{ 2350 predicate((UseSSE>=2) && (UseAVX == 0)); 2351 match(Set dst (DivD dst con)); 2352 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2353 ins_cost(150); 2354 ins_encode %{ 2355 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2356 %} 2357 ins_pipe(pipe_slow); 2358 %} 2359 2360 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2361 predicate(UseAVX > 0); 2362 match(Set dst (DivD src1 src2)); 2363 2364 format %{ "vdivsd $dst, $src1, $src2" %} 2365 ins_cost(150); 2366 ins_encode %{ 2367 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2368 %} 2369 ins_pipe(pipe_slow); 2370 %} 2371 2372 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2373 predicate(UseAVX > 0); 2374 match(Set dst (DivD src1 (LoadD src2))); 2375 2376 format %{ "vdivsd $dst, $src1, $src2" %} 2377 ins_cost(150); 2378 ins_encode %{ 2379 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2380 %} 2381 ins_pipe(pipe_slow); 2382 %} 2383 2384 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2385 predicate(UseAVX > 0); 2386 match(Set dst (DivD src con)); 2387 2388 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2389 ins_cost(150); 2390 ins_encode %{ 2391 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2392 %} 2393 ins_pipe(pipe_slow); 2394 %} 2395 2396 instruct absF_reg(regF dst) %{ 2397 predicate((UseSSE>=1) && (UseAVX == 0)); 2398 match(Set dst (AbsF dst)); 2399 ins_cost(150); 2400 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2401 ins_encode %{ 2402 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2403 %} 2404 ins_pipe(pipe_slow); 2405 %} 2406 2407 instruct absF_reg_reg(regF dst, regF src) %{ 2408 predicate(VM_Version::supports_avxonly()); 2409 match(Set dst (AbsF src)); 2410 ins_cost(150); 2411 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2412 ins_encode %{ 2413 int vector_len = 0; 2414 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2415 ExternalAddress(float_signmask()), vector_len); 2416 %} 2417 ins_pipe(pipe_slow); 2418 %} 2419 2420 #ifdef _LP64 2421 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2422 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2423 match(Set dst (AbsF src)); 2424 ins_cost(150); 2425 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2426 ins_encode %{ 2427 int vector_len = 0; 2428 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2429 ExternalAddress(float_signmask()), vector_len); 2430 %} 2431 ins_pipe(pipe_slow); 2432 %} 2433 2434 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2435 predicate(VM_Version::supports_avx512novl()); 2436 match(Set dst (AbsF src1)); 2437 effect(TEMP src2); 2438 ins_cost(150); 2439 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2440 ins_encode %{ 2441 int vector_len = 0; 2442 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2443 ExternalAddress(float_signmask()), vector_len); 2444 %} 2445 ins_pipe(pipe_slow); 2446 %} 2447 #else // _LP64 2448 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2449 predicate(UseAVX > 2); 2450 match(Set dst (AbsF src)); 2451 ins_cost(150); 2452 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2453 ins_encode %{ 2454 int vector_len = 0; 2455 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2456 ExternalAddress(float_signmask()), vector_len); 2457 %} 2458 ins_pipe(pipe_slow); 2459 %} 2460 #endif 2461 2462 instruct absD_reg(regD dst) %{ 2463 predicate((UseSSE>=2) && (UseAVX == 0)); 2464 match(Set dst (AbsD dst)); 2465 ins_cost(150); 2466 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2467 "# abs double by sign masking" %} 2468 ins_encode %{ 2469 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2470 %} 2471 ins_pipe(pipe_slow); 2472 %} 2473 2474 instruct absD_reg_reg(regD dst, regD src) %{ 2475 predicate(VM_Version::supports_avxonly()); 2476 match(Set dst (AbsD src)); 2477 ins_cost(150); 2478 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2479 "# abs double by sign masking" %} 2480 ins_encode %{ 2481 int vector_len = 0; 2482 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2483 ExternalAddress(double_signmask()), vector_len); 2484 %} 2485 ins_pipe(pipe_slow); 2486 %} 2487 2488 #ifdef _LP64 2489 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2490 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2491 match(Set dst (AbsD src)); 2492 ins_cost(150); 2493 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2494 "# abs double by sign masking" %} 2495 ins_encode %{ 2496 int vector_len = 0; 2497 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2498 ExternalAddress(double_signmask()), vector_len); 2499 %} 2500 ins_pipe(pipe_slow); 2501 %} 2502 2503 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2504 predicate(VM_Version::supports_avx512novl()); 2505 match(Set dst (AbsD src1)); 2506 effect(TEMP src2); 2507 ins_cost(150); 2508 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2509 ins_encode %{ 2510 int vector_len = 0; 2511 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2512 ExternalAddress(double_signmask()), vector_len); 2513 %} 2514 ins_pipe(pipe_slow); 2515 %} 2516 #else // _LP64 2517 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2518 predicate(UseAVX > 2); 2519 match(Set dst (AbsD src)); 2520 ins_cost(150); 2521 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2522 "# abs double by sign masking" %} 2523 ins_encode %{ 2524 int vector_len = 0; 2525 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2526 ExternalAddress(double_signmask()), vector_len); 2527 %} 2528 ins_pipe(pipe_slow); 2529 %} 2530 #endif 2531 2532 instruct negF_reg(regF dst) %{ 2533 predicate((UseSSE>=1) && (UseAVX == 0)); 2534 match(Set dst (NegF dst)); 2535 ins_cost(150); 2536 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2537 ins_encode %{ 2538 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2539 %} 2540 ins_pipe(pipe_slow); 2541 %} 2542 2543 instruct negF_reg_reg(regF dst, regF src) %{ 2544 predicate(UseAVX > 0); 2545 match(Set dst (NegF src)); 2546 ins_cost(150); 2547 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2548 ins_encode %{ 2549 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2550 ExternalAddress(float_signflip())); 2551 %} 2552 ins_pipe(pipe_slow); 2553 %} 2554 2555 instruct negD_reg(regD dst) %{ 2556 predicate((UseSSE>=2) && (UseAVX == 0)); 2557 match(Set dst (NegD dst)); 2558 ins_cost(150); 2559 format %{ "xorpd $dst, [0x8000000000000000]\t" 2560 "# neg double by sign flipping" %} 2561 ins_encode %{ 2562 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2563 %} 2564 ins_pipe(pipe_slow); 2565 %} 2566 2567 instruct negD_reg_reg(regD dst, regD src) %{ 2568 predicate(UseAVX > 0); 2569 match(Set dst (NegD src)); 2570 ins_cost(150); 2571 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2572 "# neg double by sign flipping" %} 2573 ins_encode %{ 2574 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2575 ExternalAddress(double_signflip())); 2576 %} 2577 ins_pipe(pipe_slow); 2578 %} 2579 2580 instruct sqrtF_reg(regF dst, regF src) %{ 2581 predicate(UseSSE>=1); 2582 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2583 2584 format %{ "sqrtss $dst, $src" %} 2585 ins_cost(150); 2586 ins_encode %{ 2587 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2588 %} 2589 ins_pipe(pipe_slow); 2590 %} 2591 2592 instruct sqrtF_mem(regF dst, memory src) %{ 2593 predicate(UseSSE>=1); 2594 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2595 2596 format %{ "sqrtss $dst, $src" %} 2597 ins_cost(150); 2598 ins_encode %{ 2599 __ sqrtss($dst$$XMMRegister, $src$$Address); 2600 %} 2601 ins_pipe(pipe_slow); 2602 %} 2603 2604 instruct sqrtF_imm(regF dst, immF con) %{ 2605 predicate(UseSSE>=1); 2606 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2607 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2608 ins_cost(150); 2609 ins_encode %{ 2610 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2611 %} 2612 ins_pipe(pipe_slow); 2613 %} 2614 2615 instruct sqrtD_reg(regD dst, regD src) %{ 2616 predicate(UseSSE>=2); 2617 match(Set dst (SqrtD src)); 2618 2619 format %{ "sqrtsd $dst, $src" %} 2620 ins_cost(150); 2621 ins_encode %{ 2622 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2623 %} 2624 ins_pipe(pipe_slow); 2625 %} 2626 2627 instruct sqrtD_mem(regD dst, memory src) %{ 2628 predicate(UseSSE>=2); 2629 match(Set dst (SqrtD (LoadD src))); 2630 2631 format %{ "sqrtsd $dst, $src" %} 2632 ins_cost(150); 2633 ins_encode %{ 2634 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2635 %} 2636 ins_pipe(pipe_slow); 2637 %} 2638 2639 instruct sqrtD_imm(regD dst, immD con) %{ 2640 predicate(UseSSE>=2); 2641 match(Set dst (SqrtD con)); 2642 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2643 ins_cost(150); 2644 ins_encode %{ 2645 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2646 %} 2647 ins_pipe(pipe_slow); 2648 %} 2649 2650 instruct onspinwait() %{ 2651 match(OnSpinWait); 2652 ins_cost(200); 2653 2654 format %{ 2655 $$template 2656 if (os::is_MP()) { 2657 $$emit$$"pause\t! membar_onspinwait" 2658 } else { 2659 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2660 } 2661 %} 2662 ins_encode %{ 2663 __ pause(); 2664 %} 2665 ins_pipe(pipe_slow); 2666 %} 2667 2668 // a * b + c 2669 instruct fmaD_reg(regD a, regD b, regD c) %{ 2670 predicate(UseFMA); 2671 match(Set c (FmaD c (Binary a b))); 2672 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2673 ins_cost(150); 2674 ins_encode %{ 2675 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2676 %} 2677 ins_pipe( pipe_slow ); 2678 %} 2679 2680 // a * b + c 2681 instruct fmaF_reg(regF a, regF b, regF c) %{ 2682 predicate(UseFMA); 2683 match(Set c (FmaF c (Binary a b))); 2684 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2685 ins_cost(150); 2686 ins_encode %{ 2687 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2688 %} 2689 ins_pipe( pipe_slow ); 2690 %} 2691 2692 // ====================VECTOR INSTRUCTIONS===================================== 2693 2694 // Load vectors (4 bytes long) 2695 instruct loadV4(vecS dst, memory mem) %{ 2696 predicate(n->as_LoadVector()->memory_size() == 4); 2697 match(Set dst (LoadVector mem)); 2698 ins_cost(125); 2699 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2700 ins_encode %{ 2701 __ movdl($dst$$XMMRegister, $mem$$Address); 2702 %} 2703 ins_pipe( pipe_slow ); 2704 %} 2705 2706 // Load vectors (8 bytes long) 2707 instruct loadV8(vecD dst, memory mem) %{ 2708 predicate(n->as_LoadVector()->memory_size() == 8); 2709 match(Set dst (LoadVector mem)); 2710 ins_cost(125); 2711 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2712 ins_encode %{ 2713 __ movq($dst$$XMMRegister, $mem$$Address); 2714 %} 2715 ins_pipe( pipe_slow ); 2716 %} 2717 2718 // Load vectors (16 bytes long) 2719 instruct loadV16(vecX dst, memory mem) %{ 2720 predicate(n->as_LoadVector()->memory_size() == 16); 2721 match(Set dst (LoadVector mem)); 2722 ins_cost(125); 2723 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2724 ins_encode %{ 2725 __ movdqu($dst$$XMMRegister, $mem$$Address); 2726 %} 2727 ins_pipe( pipe_slow ); 2728 %} 2729 2730 // Load vectors (32 bytes long) 2731 instruct loadV32(vecY dst, memory mem) %{ 2732 predicate(n->as_LoadVector()->memory_size() == 32); 2733 match(Set dst (LoadVector mem)); 2734 ins_cost(125); 2735 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2736 ins_encode %{ 2737 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2738 %} 2739 ins_pipe( pipe_slow ); 2740 %} 2741 2742 // Load vectors (64 bytes long) 2743 instruct loadV64_dword(vecZ dst, memory mem) %{ 2744 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2745 match(Set dst (LoadVector mem)); 2746 ins_cost(125); 2747 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2748 ins_encode %{ 2749 int vector_len = 2; 2750 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2751 %} 2752 ins_pipe( pipe_slow ); 2753 %} 2754 2755 // Load vectors (64 bytes long) 2756 instruct loadV64_qword(vecZ dst, memory mem) %{ 2757 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2758 match(Set dst (LoadVector mem)); 2759 ins_cost(125); 2760 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2761 ins_encode %{ 2762 int vector_len = 2; 2763 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2764 %} 2765 ins_pipe( pipe_slow ); 2766 %} 2767 2768 // Store vectors 2769 instruct storeV4(memory mem, vecS src) %{ 2770 predicate(n->as_StoreVector()->memory_size() == 4); 2771 match(Set mem (StoreVector mem src)); 2772 ins_cost(145); 2773 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2774 ins_encode %{ 2775 __ movdl($mem$$Address, $src$$XMMRegister); 2776 %} 2777 ins_pipe( pipe_slow ); 2778 %} 2779 2780 instruct storeV8(memory mem, vecD src) %{ 2781 predicate(n->as_StoreVector()->memory_size() == 8); 2782 match(Set mem (StoreVector mem src)); 2783 ins_cost(145); 2784 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2785 ins_encode %{ 2786 __ movq($mem$$Address, $src$$XMMRegister); 2787 %} 2788 ins_pipe( pipe_slow ); 2789 %} 2790 2791 instruct storeV16(memory mem, vecX src) %{ 2792 predicate(n->as_StoreVector()->memory_size() == 16); 2793 match(Set mem (StoreVector mem src)); 2794 ins_cost(145); 2795 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2796 ins_encode %{ 2797 __ movdqu($mem$$Address, $src$$XMMRegister); 2798 %} 2799 ins_pipe( pipe_slow ); 2800 %} 2801 2802 instruct storeV32(memory mem, vecY src) %{ 2803 predicate(n->as_StoreVector()->memory_size() == 32); 2804 match(Set mem (StoreVector mem src)); 2805 ins_cost(145); 2806 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2807 ins_encode %{ 2808 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2809 %} 2810 ins_pipe( pipe_slow ); 2811 %} 2812 2813 instruct storeV64_dword(memory mem, vecZ src) %{ 2814 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2815 match(Set mem (StoreVector mem src)); 2816 ins_cost(145); 2817 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2818 ins_encode %{ 2819 int vector_len = 2; 2820 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2821 %} 2822 ins_pipe( pipe_slow ); 2823 %} 2824 2825 instruct storeV64_qword(memory mem, vecZ src) %{ 2826 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2827 match(Set mem (StoreVector mem src)); 2828 ins_cost(145); 2829 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2830 ins_encode %{ 2831 int vector_len = 2; 2832 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2833 %} 2834 ins_pipe( pipe_slow ); 2835 %} 2836 2837 // ====================LEGACY REPLICATE======================================= 2838 2839 instruct Repl4B_mem(vecS dst, memory mem) %{ 2840 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2841 match(Set dst (ReplicateB (LoadB mem))); 2842 format %{ "punpcklbw $dst,$mem\n\t" 2843 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2844 ins_encode %{ 2845 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2846 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2847 %} 2848 ins_pipe( pipe_slow ); 2849 %} 2850 2851 instruct Repl8B_mem(vecD dst, memory mem) %{ 2852 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2853 match(Set dst (ReplicateB (LoadB mem))); 2854 format %{ "punpcklbw $dst,$mem\n\t" 2855 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2856 ins_encode %{ 2857 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2858 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2859 %} 2860 ins_pipe( pipe_slow ); 2861 %} 2862 2863 instruct Repl16B(vecX dst, rRegI src) %{ 2864 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2865 match(Set dst (ReplicateB src)); 2866 format %{ "movd $dst,$src\n\t" 2867 "punpcklbw $dst,$dst\n\t" 2868 "pshuflw $dst,$dst,0x00\n\t" 2869 "punpcklqdq $dst,$dst\t! replicate16B" %} 2870 ins_encode %{ 2871 __ movdl($dst$$XMMRegister, $src$$Register); 2872 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2873 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2874 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2875 %} 2876 ins_pipe( pipe_slow ); 2877 %} 2878 2879 instruct Repl16B_mem(vecX dst, memory mem) %{ 2880 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2881 match(Set dst (ReplicateB (LoadB mem))); 2882 format %{ "punpcklbw $dst,$mem\n\t" 2883 "pshuflw $dst,$dst,0x00\n\t" 2884 "punpcklqdq $dst,$dst\t! replicate16B" %} 2885 ins_encode %{ 2886 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2887 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2888 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2889 %} 2890 ins_pipe( pipe_slow ); 2891 %} 2892 2893 instruct Repl32B(vecY dst, rRegI src) %{ 2894 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2895 match(Set dst (ReplicateB src)); 2896 format %{ "movd $dst,$src\n\t" 2897 "punpcklbw $dst,$dst\n\t" 2898 "pshuflw $dst,$dst,0x00\n\t" 2899 "punpcklqdq $dst,$dst\n\t" 2900 "vinserti128_high $dst,$dst\t! replicate32B" %} 2901 ins_encode %{ 2902 __ movdl($dst$$XMMRegister, $src$$Register); 2903 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2904 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2905 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2906 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2907 %} 2908 ins_pipe( pipe_slow ); 2909 %} 2910 2911 instruct Repl32B_mem(vecY dst, memory mem) %{ 2912 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2913 match(Set dst (ReplicateB (LoadB mem))); 2914 format %{ "punpcklbw $dst,$mem\n\t" 2915 "pshuflw $dst,$dst,0x00\n\t" 2916 "punpcklqdq $dst,$dst\n\t" 2917 "vinserti128_high $dst,$dst\t! replicate32B" %} 2918 ins_encode %{ 2919 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2920 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2921 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2922 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2923 %} 2924 ins_pipe( pipe_slow ); 2925 %} 2926 2927 instruct Repl16B_imm(vecX dst, immI con) %{ 2928 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2929 match(Set dst (ReplicateB con)); 2930 format %{ "movq $dst,[$constantaddress]\n\t" 2931 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2932 ins_encode %{ 2933 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2934 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2935 %} 2936 ins_pipe( pipe_slow ); 2937 %} 2938 2939 instruct Repl32B_imm(vecY dst, immI con) %{ 2940 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2941 match(Set dst (ReplicateB con)); 2942 format %{ "movq $dst,[$constantaddress]\n\t" 2943 "punpcklqdq $dst,$dst\n\t" 2944 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 2945 ins_encode %{ 2946 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2947 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2948 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2949 %} 2950 ins_pipe( pipe_slow ); 2951 %} 2952 2953 instruct Repl4S(vecD dst, rRegI src) %{ 2954 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 2955 match(Set dst (ReplicateS src)); 2956 format %{ "movd $dst,$src\n\t" 2957 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2958 ins_encode %{ 2959 __ movdl($dst$$XMMRegister, $src$$Register); 2960 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2961 %} 2962 ins_pipe( pipe_slow ); 2963 %} 2964 2965 instruct Repl4S_mem(vecD dst, memory mem) %{ 2966 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2967 match(Set dst (ReplicateS (LoadS mem))); 2968 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 2969 ins_encode %{ 2970 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2971 %} 2972 ins_pipe( pipe_slow ); 2973 %} 2974 2975 instruct Repl8S(vecX dst, rRegI src) %{ 2976 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 2977 match(Set dst (ReplicateS src)); 2978 format %{ "movd $dst,$src\n\t" 2979 "pshuflw $dst,$dst,0x00\n\t" 2980 "punpcklqdq $dst,$dst\t! replicate8S" %} 2981 ins_encode %{ 2982 __ movdl($dst$$XMMRegister, $src$$Register); 2983 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2984 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2985 %} 2986 ins_pipe( pipe_slow ); 2987 %} 2988 2989 instruct Repl8S_mem(vecX dst, memory mem) %{ 2990 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2991 match(Set dst (ReplicateS (LoadS mem))); 2992 format %{ "pshuflw $dst,$mem,0x00\n\t" 2993 "punpcklqdq $dst,$dst\t! replicate8S" %} 2994 ins_encode %{ 2995 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2996 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2997 %} 2998 ins_pipe( pipe_slow ); 2999 %} 3000 3001 instruct Repl8S_imm(vecX dst, immI con) %{ 3002 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3003 match(Set dst (ReplicateS con)); 3004 format %{ "movq $dst,[$constantaddress]\n\t" 3005 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3006 ins_encode %{ 3007 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3008 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3009 %} 3010 ins_pipe( pipe_slow ); 3011 %} 3012 3013 instruct Repl16S(vecY dst, rRegI src) %{ 3014 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3015 match(Set dst (ReplicateS src)); 3016 format %{ "movd $dst,$src\n\t" 3017 "pshuflw $dst,$dst,0x00\n\t" 3018 "punpcklqdq $dst,$dst\n\t" 3019 "vinserti128_high $dst,$dst\t! replicate16S" %} 3020 ins_encode %{ 3021 __ movdl($dst$$XMMRegister, $src$$Register); 3022 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3023 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3024 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3025 %} 3026 ins_pipe( pipe_slow ); 3027 %} 3028 3029 instruct Repl16S_mem(vecY dst, memory mem) %{ 3030 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3031 match(Set dst (ReplicateS (LoadS mem))); 3032 format %{ "pshuflw $dst,$mem,0x00\n\t" 3033 "punpcklqdq $dst,$dst\n\t" 3034 "vinserti128_high $dst,$dst\t! replicate16S" %} 3035 ins_encode %{ 3036 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3037 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3038 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3039 %} 3040 ins_pipe( pipe_slow ); 3041 %} 3042 3043 instruct Repl16S_imm(vecY dst, immI con) %{ 3044 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3045 match(Set dst (ReplicateS con)); 3046 format %{ "movq $dst,[$constantaddress]\n\t" 3047 "punpcklqdq $dst,$dst\n\t" 3048 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3049 ins_encode %{ 3050 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3051 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3052 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3053 %} 3054 ins_pipe( pipe_slow ); 3055 %} 3056 3057 instruct Repl4I(vecX dst, rRegI src) %{ 3058 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3059 match(Set dst (ReplicateI src)); 3060 format %{ "movd $dst,$src\n\t" 3061 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3062 ins_encode %{ 3063 __ movdl($dst$$XMMRegister, $src$$Register); 3064 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3065 %} 3066 ins_pipe( pipe_slow ); 3067 %} 3068 3069 instruct Repl4I_mem(vecX dst, memory mem) %{ 3070 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3071 match(Set dst (ReplicateI (LoadI mem))); 3072 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3073 ins_encode %{ 3074 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3075 %} 3076 ins_pipe( pipe_slow ); 3077 %} 3078 3079 instruct Repl8I(vecY dst, rRegI src) %{ 3080 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3081 match(Set dst (ReplicateI src)); 3082 format %{ "movd $dst,$src\n\t" 3083 "pshufd $dst,$dst,0x00\n\t" 3084 "vinserti128_high $dst,$dst\t! replicate8I" %} 3085 ins_encode %{ 3086 __ movdl($dst$$XMMRegister, $src$$Register); 3087 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3088 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3089 %} 3090 ins_pipe( pipe_slow ); 3091 %} 3092 3093 instruct Repl8I_mem(vecY dst, memory mem) %{ 3094 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3095 match(Set dst (ReplicateI (LoadI mem))); 3096 format %{ "pshufd $dst,$mem,0x00\n\t" 3097 "vinserti128_high $dst,$dst\t! replicate8I" %} 3098 ins_encode %{ 3099 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3100 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3101 %} 3102 ins_pipe( pipe_slow ); 3103 %} 3104 3105 instruct Repl4I_imm(vecX dst, immI con) %{ 3106 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3107 match(Set dst (ReplicateI con)); 3108 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3109 "punpcklqdq $dst,$dst" %} 3110 ins_encode %{ 3111 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3112 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3113 %} 3114 ins_pipe( pipe_slow ); 3115 %} 3116 3117 instruct Repl8I_imm(vecY dst, immI con) %{ 3118 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3119 match(Set dst (ReplicateI con)); 3120 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3121 "punpcklqdq $dst,$dst\n\t" 3122 "vinserti128_high $dst,$dst" %} 3123 ins_encode %{ 3124 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3125 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3126 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3127 %} 3128 ins_pipe( pipe_slow ); 3129 %} 3130 3131 // Long could be loaded into xmm register directly from memory. 3132 instruct Repl2L_mem(vecX dst, memory mem) %{ 3133 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3134 match(Set dst (ReplicateL (LoadL mem))); 3135 format %{ "movq $dst,$mem\n\t" 3136 "punpcklqdq $dst,$dst\t! replicate2L" %} 3137 ins_encode %{ 3138 __ movq($dst$$XMMRegister, $mem$$Address); 3139 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3140 %} 3141 ins_pipe( pipe_slow ); 3142 %} 3143 3144 // Replicate long (8 byte) scalar to be vector 3145 #ifdef _LP64 3146 instruct Repl4L(vecY dst, rRegL src) %{ 3147 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3148 match(Set dst (ReplicateL src)); 3149 format %{ "movdq $dst,$src\n\t" 3150 "punpcklqdq $dst,$dst\n\t" 3151 "vinserti128_high $dst,$dst\t! replicate4L" %} 3152 ins_encode %{ 3153 __ movdq($dst$$XMMRegister, $src$$Register); 3154 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3155 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3156 %} 3157 ins_pipe( pipe_slow ); 3158 %} 3159 #else // _LP64 3160 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3161 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3162 match(Set dst (ReplicateL src)); 3163 effect(TEMP dst, USE src, TEMP tmp); 3164 format %{ "movdl $dst,$src.lo\n\t" 3165 "movdl $tmp,$src.hi\n\t" 3166 "punpckldq $dst,$tmp\n\t" 3167 "punpcklqdq $dst,$dst\n\t" 3168 "vinserti128_high $dst,$dst\t! replicate4L" %} 3169 ins_encode %{ 3170 __ movdl($dst$$XMMRegister, $src$$Register); 3171 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3172 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3173 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3174 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3175 %} 3176 ins_pipe( pipe_slow ); 3177 %} 3178 #endif // _LP64 3179 3180 instruct Repl4L_imm(vecY dst, immL con) %{ 3181 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3182 match(Set dst (ReplicateL con)); 3183 format %{ "movq $dst,[$constantaddress]\n\t" 3184 "punpcklqdq $dst,$dst\n\t" 3185 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3186 ins_encode %{ 3187 __ movq($dst$$XMMRegister, $constantaddress($con)); 3188 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3189 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3190 %} 3191 ins_pipe( pipe_slow ); 3192 %} 3193 3194 instruct Repl4L_mem(vecY dst, memory mem) %{ 3195 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3196 match(Set dst (ReplicateL (LoadL mem))); 3197 format %{ "movq $dst,$mem\n\t" 3198 "punpcklqdq $dst,$dst\n\t" 3199 "vinserti128_high $dst,$dst\t! replicate4L" %} 3200 ins_encode %{ 3201 __ movq($dst$$XMMRegister, $mem$$Address); 3202 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3203 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3204 %} 3205 ins_pipe( pipe_slow ); 3206 %} 3207 3208 instruct Repl2F_mem(vecD dst, memory mem) %{ 3209 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3210 match(Set dst (ReplicateF (LoadF mem))); 3211 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3212 ins_encode %{ 3213 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3214 %} 3215 ins_pipe( pipe_slow ); 3216 %} 3217 3218 instruct Repl4F_mem(vecX dst, memory mem) %{ 3219 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3220 match(Set dst (ReplicateF (LoadF mem))); 3221 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3222 ins_encode %{ 3223 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3224 %} 3225 ins_pipe( pipe_slow ); 3226 %} 3227 3228 instruct Repl8F(vecY dst, regF src) %{ 3229 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3230 match(Set dst (ReplicateF src)); 3231 format %{ "pshufd $dst,$src,0x00\n\t" 3232 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3233 ins_encode %{ 3234 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3235 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3236 %} 3237 ins_pipe( pipe_slow ); 3238 %} 3239 3240 instruct Repl8F_mem(vecY dst, memory mem) %{ 3241 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3242 match(Set dst (ReplicateF (LoadF mem))); 3243 format %{ "pshufd $dst,$mem,0x00\n\t" 3244 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3245 ins_encode %{ 3246 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3247 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3248 %} 3249 ins_pipe( pipe_slow ); 3250 %} 3251 3252 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3253 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3254 match(Set dst (ReplicateF zero)); 3255 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3256 ins_encode %{ 3257 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3258 %} 3259 ins_pipe( fpu_reg_reg ); 3260 %} 3261 3262 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3263 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3264 match(Set dst (ReplicateF zero)); 3265 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3266 ins_encode %{ 3267 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3268 %} 3269 ins_pipe( fpu_reg_reg ); 3270 %} 3271 3272 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3273 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3274 match(Set dst (ReplicateF zero)); 3275 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3276 ins_encode %{ 3277 int vector_len = 1; 3278 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3279 %} 3280 ins_pipe( fpu_reg_reg ); 3281 %} 3282 3283 instruct Repl2D_mem(vecX dst, memory mem) %{ 3284 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3285 match(Set dst (ReplicateD (LoadD mem))); 3286 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3287 ins_encode %{ 3288 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3289 %} 3290 ins_pipe( pipe_slow ); 3291 %} 3292 3293 instruct Repl4D(vecY dst, regD src) %{ 3294 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3295 match(Set dst (ReplicateD src)); 3296 format %{ "pshufd $dst,$src,0x44\n\t" 3297 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3298 ins_encode %{ 3299 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3300 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3301 %} 3302 ins_pipe( pipe_slow ); 3303 %} 3304 3305 instruct Repl4D_mem(vecY dst, memory mem) %{ 3306 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3307 match(Set dst (ReplicateD (LoadD mem))); 3308 format %{ "pshufd $dst,$mem,0x44\n\t" 3309 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3310 ins_encode %{ 3311 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3312 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3313 %} 3314 ins_pipe( pipe_slow ); 3315 %} 3316 3317 // Replicate double (8 byte) scalar zero to be vector 3318 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3319 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3320 match(Set dst (ReplicateD zero)); 3321 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3322 ins_encode %{ 3323 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3324 %} 3325 ins_pipe( fpu_reg_reg ); 3326 %} 3327 3328 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3329 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3330 match(Set dst (ReplicateD zero)); 3331 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3332 ins_encode %{ 3333 int vector_len = 1; 3334 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3335 %} 3336 ins_pipe( fpu_reg_reg ); 3337 %} 3338 3339 // ====================GENERIC REPLICATE========================================== 3340 3341 // Replicate byte scalar to be vector 3342 instruct Repl4B(vecS dst, rRegI src) %{ 3343 predicate(n->as_Vector()->length() == 4); 3344 match(Set dst (ReplicateB src)); 3345 format %{ "movd $dst,$src\n\t" 3346 "punpcklbw $dst,$dst\n\t" 3347 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3348 ins_encode %{ 3349 __ movdl($dst$$XMMRegister, $src$$Register); 3350 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3351 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3352 %} 3353 ins_pipe( pipe_slow ); 3354 %} 3355 3356 instruct Repl8B(vecD dst, rRegI src) %{ 3357 predicate(n->as_Vector()->length() == 8); 3358 match(Set dst (ReplicateB src)); 3359 format %{ "movd $dst,$src\n\t" 3360 "punpcklbw $dst,$dst\n\t" 3361 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3362 ins_encode %{ 3363 __ movdl($dst$$XMMRegister, $src$$Register); 3364 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3365 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 // Replicate byte scalar immediate to be vector by loading from const table. 3371 instruct Repl4B_imm(vecS dst, immI con) %{ 3372 predicate(n->as_Vector()->length() == 4); 3373 match(Set dst (ReplicateB con)); 3374 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3375 ins_encode %{ 3376 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 instruct Repl8B_imm(vecD dst, immI con) %{ 3382 predicate(n->as_Vector()->length() == 8); 3383 match(Set dst (ReplicateB con)); 3384 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3385 ins_encode %{ 3386 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 // Replicate byte scalar zero to be vector 3392 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3393 predicate(n->as_Vector()->length() == 4); 3394 match(Set dst (ReplicateB zero)); 3395 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3396 ins_encode %{ 3397 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3398 %} 3399 ins_pipe( fpu_reg_reg ); 3400 %} 3401 3402 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3403 predicate(n->as_Vector()->length() == 8); 3404 match(Set dst (ReplicateB zero)); 3405 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3406 ins_encode %{ 3407 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3408 %} 3409 ins_pipe( fpu_reg_reg ); 3410 %} 3411 3412 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3413 predicate(n->as_Vector()->length() == 16); 3414 match(Set dst (ReplicateB zero)); 3415 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3416 ins_encode %{ 3417 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3418 %} 3419 ins_pipe( fpu_reg_reg ); 3420 %} 3421 3422 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3423 predicate(n->as_Vector()->length() == 32); 3424 match(Set dst (ReplicateB zero)); 3425 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3426 ins_encode %{ 3427 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3428 int vector_len = 1; 3429 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3430 %} 3431 ins_pipe( fpu_reg_reg ); 3432 %} 3433 3434 // Replicate char/short (2 byte) scalar to be vector 3435 instruct Repl2S(vecS dst, rRegI src) %{ 3436 predicate(n->as_Vector()->length() == 2); 3437 match(Set dst (ReplicateS src)); 3438 format %{ "movd $dst,$src\n\t" 3439 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3440 ins_encode %{ 3441 __ movdl($dst$$XMMRegister, $src$$Register); 3442 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3443 %} 3444 ins_pipe( fpu_reg_reg ); 3445 %} 3446 3447 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3448 instruct Repl2S_imm(vecS dst, immI con) %{ 3449 predicate(n->as_Vector()->length() == 2); 3450 match(Set dst (ReplicateS con)); 3451 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3452 ins_encode %{ 3453 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3454 %} 3455 ins_pipe( fpu_reg_reg ); 3456 %} 3457 3458 instruct Repl4S_imm(vecD dst, immI con) %{ 3459 predicate(n->as_Vector()->length() == 4); 3460 match(Set dst (ReplicateS con)); 3461 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3462 ins_encode %{ 3463 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3464 %} 3465 ins_pipe( fpu_reg_reg ); 3466 %} 3467 3468 // Replicate char/short (2 byte) scalar zero to be vector 3469 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3470 predicate(n->as_Vector()->length() == 2); 3471 match(Set dst (ReplicateS zero)); 3472 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3473 ins_encode %{ 3474 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3475 %} 3476 ins_pipe( fpu_reg_reg ); 3477 %} 3478 3479 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3480 predicate(n->as_Vector()->length() == 4); 3481 match(Set dst (ReplicateS zero)); 3482 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3483 ins_encode %{ 3484 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3485 %} 3486 ins_pipe( fpu_reg_reg ); 3487 %} 3488 3489 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3490 predicate(n->as_Vector()->length() == 8); 3491 match(Set dst (ReplicateS zero)); 3492 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3493 ins_encode %{ 3494 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3495 %} 3496 ins_pipe( fpu_reg_reg ); 3497 %} 3498 3499 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3500 predicate(n->as_Vector()->length() == 16); 3501 match(Set dst (ReplicateS zero)); 3502 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3503 ins_encode %{ 3504 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3505 int vector_len = 1; 3506 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3507 %} 3508 ins_pipe( fpu_reg_reg ); 3509 %} 3510 3511 // Replicate integer (4 byte) scalar to be vector 3512 instruct Repl2I(vecD dst, rRegI src) %{ 3513 predicate(n->as_Vector()->length() == 2); 3514 match(Set dst (ReplicateI src)); 3515 format %{ "movd $dst,$src\n\t" 3516 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3517 ins_encode %{ 3518 __ movdl($dst$$XMMRegister, $src$$Register); 3519 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3520 %} 3521 ins_pipe( fpu_reg_reg ); 3522 %} 3523 3524 // Integer could be loaded into xmm register directly from memory. 3525 instruct Repl2I_mem(vecD dst, memory mem) %{ 3526 predicate(n->as_Vector()->length() == 2); 3527 match(Set dst (ReplicateI (LoadI mem))); 3528 format %{ "movd $dst,$mem\n\t" 3529 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3530 ins_encode %{ 3531 __ movdl($dst$$XMMRegister, $mem$$Address); 3532 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3533 %} 3534 ins_pipe( fpu_reg_reg ); 3535 %} 3536 3537 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3538 instruct Repl2I_imm(vecD dst, immI con) %{ 3539 predicate(n->as_Vector()->length() == 2); 3540 match(Set dst (ReplicateI con)); 3541 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3542 ins_encode %{ 3543 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3544 %} 3545 ins_pipe( fpu_reg_reg ); 3546 %} 3547 3548 // Replicate integer (4 byte) scalar zero to be vector 3549 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3550 predicate(n->as_Vector()->length() == 2); 3551 match(Set dst (ReplicateI zero)); 3552 format %{ "pxor $dst,$dst\t! replicate2I" %} 3553 ins_encode %{ 3554 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3555 %} 3556 ins_pipe( fpu_reg_reg ); 3557 %} 3558 3559 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3560 predicate(n->as_Vector()->length() == 4); 3561 match(Set dst (ReplicateI zero)); 3562 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3563 ins_encode %{ 3564 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3565 %} 3566 ins_pipe( fpu_reg_reg ); 3567 %} 3568 3569 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3570 predicate(n->as_Vector()->length() == 8); 3571 match(Set dst (ReplicateI zero)); 3572 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3573 ins_encode %{ 3574 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3575 int vector_len = 1; 3576 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3577 %} 3578 ins_pipe( fpu_reg_reg ); 3579 %} 3580 3581 // Replicate long (8 byte) scalar to be vector 3582 #ifdef _LP64 3583 instruct Repl2L(vecX dst, rRegL src) %{ 3584 predicate(n->as_Vector()->length() == 2); 3585 match(Set dst (ReplicateL src)); 3586 format %{ "movdq $dst,$src\n\t" 3587 "punpcklqdq $dst,$dst\t! replicate2L" %} 3588 ins_encode %{ 3589 __ movdq($dst$$XMMRegister, $src$$Register); 3590 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3591 %} 3592 ins_pipe( pipe_slow ); 3593 %} 3594 #else // _LP64 3595 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3596 predicate(n->as_Vector()->length() == 2); 3597 match(Set dst (ReplicateL src)); 3598 effect(TEMP dst, USE src, TEMP tmp); 3599 format %{ "movdl $dst,$src.lo\n\t" 3600 "movdl $tmp,$src.hi\n\t" 3601 "punpckldq $dst,$tmp\n\t" 3602 "punpcklqdq $dst,$dst\t! replicate2L"%} 3603 ins_encode %{ 3604 __ movdl($dst$$XMMRegister, $src$$Register); 3605 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3606 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3607 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3608 %} 3609 ins_pipe( pipe_slow ); 3610 %} 3611 #endif // _LP64 3612 3613 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3614 instruct Repl2L_imm(vecX dst, immL con) %{ 3615 predicate(n->as_Vector()->length() == 2); 3616 match(Set dst (ReplicateL con)); 3617 format %{ "movq $dst,[$constantaddress]\n\t" 3618 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3619 ins_encode %{ 3620 __ movq($dst$$XMMRegister, $constantaddress($con)); 3621 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3622 %} 3623 ins_pipe( pipe_slow ); 3624 %} 3625 3626 // Replicate long (8 byte) scalar zero to be vector 3627 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3628 predicate(n->as_Vector()->length() == 2); 3629 match(Set dst (ReplicateL zero)); 3630 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3631 ins_encode %{ 3632 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3633 %} 3634 ins_pipe( fpu_reg_reg ); 3635 %} 3636 3637 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3638 predicate(n->as_Vector()->length() == 4); 3639 match(Set dst (ReplicateL zero)); 3640 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3641 ins_encode %{ 3642 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3643 int vector_len = 1; 3644 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3645 %} 3646 ins_pipe( fpu_reg_reg ); 3647 %} 3648 3649 // Replicate float (4 byte) scalar to be vector 3650 instruct Repl2F(vecD dst, regF src) %{ 3651 predicate(n->as_Vector()->length() == 2); 3652 match(Set dst (ReplicateF src)); 3653 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3654 ins_encode %{ 3655 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3656 %} 3657 ins_pipe( fpu_reg_reg ); 3658 %} 3659 3660 instruct Repl4F(vecX dst, regF src) %{ 3661 predicate(n->as_Vector()->length() == 4); 3662 match(Set dst (ReplicateF src)); 3663 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3664 ins_encode %{ 3665 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3666 %} 3667 ins_pipe( pipe_slow ); 3668 %} 3669 3670 // Replicate double (8 bytes) scalar to be vector 3671 instruct Repl2D(vecX dst, regD src) %{ 3672 predicate(n->as_Vector()->length() == 2); 3673 match(Set dst (ReplicateD src)); 3674 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3675 ins_encode %{ 3676 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3677 %} 3678 ins_pipe( pipe_slow ); 3679 %} 3680 3681 // ====================EVEX REPLICATE============================================= 3682 3683 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3684 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3685 match(Set dst (ReplicateB (LoadB mem))); 3686 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3687 ins_encode %{ 3688 int vector_len = 0; 3689 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3690 %} 3691 ins_pipe( pipe_slow ); 3692 %} 3693 3694 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3695 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3696 match(Set dst (ReplicateB (LoadB mem))); 3697 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3698 ins_encode %{ 3699 int vector_len = 0; 3700 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3701 %} 3702 ins_pipe( pipe_slow ); 3703 %} 3704 3705 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3706 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3707 match(Set dst (ReplicateB src)); 3708 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3709 ins_encode %{ 3710 int vector_len = 0; 3711 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3712 %} 3713 ins_pipe( pipe_slow ); 3714 %} 3715 3716 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3717 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3718 match(Set dst (ReplicateB (LoadB mem))); 3719 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3720 ins_encode %{ 3721 int vector_len = 0; 3722 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3723 %} 3724 ins_pipe( pipe_slow ); 3725 %} 3726 3727 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3728 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3729 match(Set dst (ReplicateB src)); 3730 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3731 ins_encode %{ 3732 int vector_len = 1; 3733 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3734 %} 3735 ins_pipe( pipe_slow ); 3736 %} 3737 3738 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3739 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3740 match(Set dst (ReplicateB (LoadB mem))); 3741 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3742 ins_encode %{ 3743 int vector_len = 1; 3744 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3750 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3751 match(Set dst (ReplicateB src)); 3752 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3753 ins_encode %{ 3754 int vector_len = 2; 3755 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3756 %} 3757 ins_pipe( pipe_slow ); 3758 %} 3759 3760 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3761 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3762 match(Set dst (ReplicateB (LoadB mem))); 3763 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3764 ins_encode %{ 3765 int vector_len = 2; 3766 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3767 %} 3768 ins_pipe( pipe_slow ); 3769 %} 3770 3771 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3772 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3773 match(Set dst (ReplicateB con)); 3774 format %{ "movq $dst,[$constantaddress]\n\t" 3775 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3776 ins_encode %{ 3777 int vector_len = 0; 3778 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3779 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3780 %} 3781 ins_pipe( pipe_slow ); 3782 %} 3783 3784 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3785 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3786 match(Set dst (ReplicateB con)); 3787 format %{ "movq $dst,[$constantaddress]\n\t" 3788 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3789 ins_encode %{ 3790 int vector_len = 1; 3791 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3792 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3793 %} 3794 ins_pipe( pipe_slow ); 3795 %} 3796 3797 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3798 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3799 match(Set dst (ReplicateB con)); 3800 format %{ "movq $dst,[$constantaddress]\n\t" 3801 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3802 ins_encode %{ 3803 int vector_len = 2; 3804 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3805 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3806 %} 3807 ins_pipe( pipe_slow ); 3808 %} 3809 3810 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3811 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3812 match(Set dst (ReplicateB zero)); 3813 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3814 ins_encode %{ 3815 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3816 int vector_len = 2; 3817 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3818 %} 3819 ins_pipe( fpu_reg_reg ); 3820 %} 3821 3822 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3823 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3824 match(Set dst (ReplicateS src)); 3825 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3826 ins_encode %{ 3827 int vector_len = 0; 3828 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3829 %} 3830 ins_pipe( pipe_slow ); 3831 %} 3832 3833 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3834 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3835 match(Set dst (ReplicateS (LoadS mem))); 3836 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3837 ins_encode %{ 3838 int vector_len = 0; 3839 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3840 %} 3841 ins_pipe( pipe_slow ); 3842 %} 3843 3844 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3845 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3846 match(Set dst (ReplicateS src)); 3847 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3848 ins_encode %{ 3849 int vector_len = 0; 3850 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3851 %} 3852 ins_pipe( pipe_slow ); 3853 %} 3854 3855 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3856 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3857 match(Set dst (ReplicateS (LoadS mem))); 3858 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3859 ins_encode %{ 3860 int vector_len = 0; 3861 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3862 %} 3863 ins_pipe( pipe_slow ); 3864 %} 3865 3866 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3867 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3868 match(Set dst (ReplicateS src)); 3869 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3870 ins_encode %{ 3871 int vector_len = 1; 3872 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3873 %} 3874 ins_pipe( pipe_slow ); 3875 %} 3876 3877 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3878 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3879 match(Set dst (ReplicateS (LoadS mem))); 3880 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3881 ins_encode %{ 3882 int vector_len = 1; 3883 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3884 %} 3885 ins_pipe( pipe_slow ); 3886 %} 3887 3888 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3889 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3890 match(Set dst (ReplicateS src)); 3891 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3892 ins_encode %{ 3893 int vector_len = 2; 3894 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3895 %} 3896 ins_pipe( pipe_slow ); 3897 %} 3898 3899 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3900 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3901 match(Set dst (ReplicateS (LoadS mem))); 3902 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3903 ins_encode %{ 3904 int vector_len = 2; 3905 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3906 %} 3907 ins_pipe( pipe_slow ); 3908 %} 3909 3910 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3911 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3912 match(Set dst (ReplicateS con)); 3913 format %{ "movq $dst,[$constantaddress]\n\t" 3914 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3915 ins_encode %{ 3916 int vector_len = 0; 3917 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3918 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3924 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3925 match(Set dst (ReplicateS con)); 3926 format %{ "movq $dst,[$constantaddress]\n\t" 3927 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3928 ins_encode %{ 3929 int vector_len = 1; 3930 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3931 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3932 %} 3933 ins_pipe( pipe_slow ); 3934 %} 3935 3936 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3937 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3938 match(Set dst (ReplicateS con)); 3939 format %{ "movq $dst,[$constantaddress]\n\t" 3940 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3941 ins_encode %{ 3942 int vector_len = 2; 3943 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3944 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3950 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3951 match(Set dst (ReplicateS zero)); 3952 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3953 ins_encode %{ 3954 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3955 int vector_len = 2; 3956 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3957 %} 3958 ins_pipe( fpu_reg_reg ); 3959 %} 3960 3961 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3962 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3963 match(Set dst (ReplicateI src)); 3964 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3965 ins_encode %{ 3966 int vector_len = 0; 3967 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3968 %} 3969 ins_pipe( pipe_slow ); 3970 %} 3971 3972 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3973 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3974 match(Set dst (ReplicateI (LoadI mem))); 3975 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3976 ins_encode %{ 3977 int vector_len = 0; 3978 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3979 %} 3980 ins_pipe( pipe_slow ); 3981 %} 3982 3983 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3984 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3985 match(Set dst (ReplicateI src)); 3986 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3987 ins_encode %{ 3988 int vector_len = 1; 3989 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 3994 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 3995 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3996 match(Set dst (ReplicateI (LoadI mem))); 3997 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 3998 ins_encode %{ 3999 int vector_len = 1; 4000 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4001 %} 4002 ins_pipe( pipe_slow ); 4003 %} 4004 4005 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4006 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4007 match(Set dst (ReplicateI src)); 4008 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4009 ins_encode %{ 4010 int vector_len = 2; 4011 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4012 %} 4013 ins_pipe( pipe_slow ); 4014 %} 4015 4016 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4017 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4018 match(Set dst (ReplicateI (LoadI mem))); 4019 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4020 ins_encode %{ 4021 int vector_len = 2; 4022 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4023 %} 4024 ins_pipe( pipe_slow ); 4025 %} 4026 4027 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4028 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4029 match(Set dst (ReplicateI con)); 4030 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4031 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4032 ins_encode %{ 4033 int vector_len = 0; 4034 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4035 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4041 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4042 match(Set dst (ReplicateI con)); 4043 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4044 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4045 ins_encode %{ 4046 int vector_len = 1; 4047 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4048 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4049 %} 4050 ins_pipe( pipe_slow ); 4051 %} 4052 4053 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4054 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4055 match(Set dst (ReplicateI con)); 4056 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4057 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4058 ins_encode %{ 4059 int vector_len = 2; 4060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4061 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4062 %} 4063 ins_pipe( pipe_slow ); 4064 %} 4065 4066 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4067 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4068 match(Set dst (ReplicateI zero)); 4069 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4070 ins_encode %{ 4071 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4072 int vector_len = 2; 4073 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4074 %} 4075 ins_pipe( fpu_reg_reg ); 4076 %} 4077 4078 // Replicate long (8 byte) scalar to be vector 4079 #ifdef _LP64 4080 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4081 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4082 match(Set dst (ReplicateL src)); 4083 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4084 ins_encode %{ 4085 int vector_len = 1; 4086 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4087 %} 4088 ins_pipe( pipe_slow ); 4089 %} 4090 4091 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4092 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4093 match(Set dst (ReplicateL src)); 4094 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4095 ins_encode %{ 4096 int vector_len = 2; 4097 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 #else // _LP64 4102 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4103 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4104 match(Set dst (ReplicateL src)); 4105 effect(TEMP dst, USE src, TEMP tmp); 4106 format %{ "movdl $dst,$src.lo\n\t" 4107 "movdl $tmp,$src.hi\n\t" 4108 "punpckldq $dst,$tmp\n\t" 4109 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4110 ins_encode %{ 4111 int vector_len = 1; 4112 __ movdl($dst$$XMMRegister, $src$$Register); 4113 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4114 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4115 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4116 %} 4117 ins_pipe( pipe_slow ); 4118 %} 4119 4120 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4121 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4122 match(Set dst (ReplicateL src)); 4123 effect(TEMP dst, USE src, TEMP tmp); 4124 format %{ "movdl $dst,$src.lo\n\t" 4125 "movdl $tmp,$src.hi\n\t" 4126 "punpckldq $dst,$tmp\n\t" 4127 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4128 ins_encode %{ 4129 int vector_len = 2; 4130 __ movdl($dst$$XMMRegister, $src$$Register); 4131 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4132 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4133 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4134 %} 4135 ins_pipe( pipe_slow ); 4136 %} 4137 #endif // _LP64 4138 4139 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4140 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4141 match(Set dst (ReplicateL con)); 4142 format %{ "movq $dst,[$constantaddress]\n\t" 4143 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4144 ins_encode %{ 4145 int vector_len = 1; 4146 __ movq($dst$$XMMRegister, $constantaddress($con)); 4147 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4148 %} 4149 ins_pipe( pipe_slow ); 4150 %} 4151 4152 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4153 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4154 match(Set dst (ReplicateL con)); 4155 format %{ "movq $dst,[$constantaddress]\n\t" 4156 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4157 ins_encode %{ 4158 int vector_len = 2; 4159 __ movq($dst$$XMMRegister, $constantaddress($con)); 4160 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4161 %} 4162 ins_pipe( pipe_slow ); 4163 %} 4164 4165 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4166 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4167 match(Set dst (ReplicateL (LoadL mem))); 4168 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4169 ins_encode %{ 4170 int vector_len = 0; 4171 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4172 %} 4173 ins_pipe( pipe_slow ); 4174 %} 4175 4176 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4177 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4178 match(Set dst (ReplicateL (LoadL mem))); 4179 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4180 ins_encode %{ 4181 int vector_len = 1; 4182 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4183 %} 4184 ins_pipe( pipe_slow ); 4185 %} 4186 4187 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4188 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4189 match(Set dst (ReplicateL (LoadL mem))); 4190 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4191 ins_encode %{ 4192 int vector_len = 2; 4193 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 4198 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4199 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4200 match(Set dst (ReplicateL zero)); 4201 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4202 ins_encode %{ 4203 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4204 int vector_len = 2; 4205 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4206 %} 4207 ins_pipe( fpu_reg_reg ); 4208 %} 4209 4210 instruct Repl8F_evex(vecY dst, regF src) %{ 4211 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4212 match(Set dst (ReplicateF src)); 4213 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4214 ins_encode %{ 4215 int vector_len = 1; 4216 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4217 %} 4218 ins_pipe( pipe_slow ); 4219 %} 4220 4221 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4222 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4223 match(Set dst (ReplicateF (LoadF mem))); 4224 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4225 ins_encode %{ 4226 int vector_len = 1; 4227 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct Repl16F_evex(vecZ dst, regF src) %{ 4233 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4234 match(Set dst (ReplicateF src)); 4235 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4236 ins_encode %{ 4237 int vector_len = 2; 4238 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4239 %} 4240 ins_pipe( pipe_slow ); 4241 %} 4242 4243 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4244 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4245 match(Set dst (ReplicateF (LoadF mem))); 4246 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4247 ins_encode %{ 4248 int vector_len = 2; 4249 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4255 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4256 match(Set dst (ReplicateF zero)); 4257 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4258 ins_encode %{ 4259 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4260 int vector_len = 2; 4261 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4262 %} 4263 ins_pipe( fpu_reg_reg ); 4264 %} 4265 4266 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4267 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4268 match(Set dst (ReplicateF zero)); 4269 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4270 ins_encode %{ 4271 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4272 int vector_len = 2; 4273 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4274 %} 4275 ins_pipe( fpu_reg_reg ); 4276 %} 4277 4278 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4279 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4280 match(Set dst (ReplicateF zero)); 4281 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4282 ins_encode %{ 4283 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4284 int vector_len = 2; 4285 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4286 %} 4287 ins_pipe( fpu_reg_reg ); 4288 %} 4289 4290 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4291 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4292 match(Set dst (ReplicateF zero)); 4293 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4294 ins_encode %{ 4295 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4296 int vector_len = 2; 4297 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4298 %} 4299 ins_pipe( fpu_reg_reg ); 4300 %} 4301 4302 instruct Repl4D_evex(vecY dst, regD src) %{ 4303 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4304 match(Set dst (ReplicateD src)); 4305 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4306 ins_encode %{ 4307 int vector_len = 1; 4308 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4309 %} 4310 ins_pipe( pipe_slow ); 4311 %} 4312 4313 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4314 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4315 match(Set dst (ReplicateD (LoadD mem))); 4316 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4317 ins_encode %{ 4318 int vector_len = 1; 4319 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4320 %} 4321 ins_pipe( pipe_slow ); 4322 %} 4323 4324 instruct Repl8D_evex(vecZ dst, regD src) %{ 4325 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4326 match(Set dst (ReplicateD src)); 4327 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4328 ins_encode %{ 4329 int vector_len = 2; 4330 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4331 %} 4332 ins_pipe( pipe_slow ); 4333 %} 4334 4335 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4336 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4337 match(Set dst (ReplicateD (LoadD mem))); 4338 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4339 ins_encode %{ 4340 int vector_len = 2; 4341 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4342 %} 4343 ins_pipe( pipe_slow ); 4344 %} 4345 4346 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4347 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4348 match(Set dst (ReplicateD zero)); 4349 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4350 ins_encode %{ 4351 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4352 int vector_len = 2; 4353 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4354 %} 4355 ins_pipe( fpu_reg_reg ); 4356 %} 4357 4358 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4359 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4360 match(Set dst (ReplicateD zero)); 4361 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4362 ins_encode %{ 4363 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4364 int vector_len = 2; 4365 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4366 %} 4367 ins_pipe( fpu_reg_reg ); 4368 %} 4369 4370 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4371 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4372 match(Set dst (ReplicateD zero)); 4373 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4374 ins_encode %{ 4375 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4376 int vector_len = 2; 4377 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4378 %} 4379 ins_pipe( fpu_reg_reg ); 4380 %} 4381 4382 // ====================REDUCTION ARITHMETIC======================================= 4383 4384 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4385 predicate(UseSSE > 2 && UseAVX == 0); 4386 match(Set dst (AddReductionVI src1 src2)); 4387 effect(TEMP tmp2, TEMP tmp); 4388 format %{ "movdqu $tmp2,$src2\n\t" 4389 "phaddd $tmp2,$tmp2\n\t" 4390 "movd $tmp,$src1\n\t" 4391 "paddd $tmp,$tmp2\n\t" 4392 "movd $dst,$tmp\t! add reduction2I" %} 4393 ins_encode %{ 4394 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4395 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4396 __ movdl($tmp$$XMMRegister, $src1$$Register); 4397 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4398 __ movdl($dst$$Register, $tmp$$XMMRegister); 4399 %} 4400 ins_pipe( pipe_slow ); 4401 %} 4402 4403 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4404 predicate(VM_Version::supports_avxonly()); 4405 match(Set dst (AddReductionVI src1 src2)); 4406 effect(TEMP tmp, TEMP tmp2); 4407 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4408 "movd $tmp2,$src1\n\t" 4409 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4410 "movd $dst,$tmp2\t! add reduction2I" %} 4411 ins_encode %{ 4412 int vector_len = 0; 4413 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4414 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4415 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4416 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4417 %} 4418 ins_pipe( pipe_slow ); 4419 %} 4420 4421 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4422 predicate(UseAVX > 2); 4423 match(Set dst (AddReductionVI src1 src2)); 4424 effect(TEMP tmp, TEMP tmp2); 4425 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4426 "vpaddd $tmp,$src2,$tmp2\n\t" 4427 "movd $tmp2,$src1\n\t" 4428 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4429 "movd $dst,$tmp2\t! add reduction2I" %} 4430 ins_encode %{ 4431 int vector_len = 0; 4432 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4433 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4434 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4435 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4436 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4437 %} 4438 ins_pipe( pipe_slow ); 4439 %} 4440 4441 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4442 predicate(UseSSE > 2 && UseAVX == 0); 4443 match(Set dst (AddReductionVI src1 src2)); 4444 effect(TEMP tmp, TEMP tmp2); 4445 format %{ "movdqu $tmp,$src2\n\t" 4446 "phaddd $tmp,$tmp\n\t" 4447 "phaddd $tmp,$tmp\n\t" 4448 "movd $tmp2,$src1\n\t" 4449 "paddd $tmp2,$tmp\n\t" 4450 "movd $dst,$tmp2\t! add reduction4I" %} 4451 ins_encode %{ 4452 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4453 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4454 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4455 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4456 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4457 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4463 predicate(VM_Version::supports_avxonly()); 4464 match(Set dst (AddReductionVI src1 src2)); 4465 effect(TEMP tmp, TEMP tmp2); 4466 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4467 "vphaddd $tmp,$tmp,$tmp\n\t" 4468 "movd $tmp2,$src1\n\t" 4469 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4470 "movd $dst,$tmp2\t! add reduction4I" %} 4471 ins_encode %{ 4472 int vector_len = 0; 4473 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4474 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4475 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4476 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4477 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4478 %} 4479 ins_pipe( pipe_slow ); 4480 %} 4481 4482 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4483 predicate(UseAVX > 2); 4484 match(Set dst (AddReductionVI src1 src2)); 4485 effect(TEMP tmp, TEMP tmp2); 4486 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4487 "vpaddd $tmp,$src2,$tmp2\n\t" 4488 "pshufd $tmp2,$tmp,0x1\n\t" 4489 "vpaddd $tmp,$tmp,$tmp2\n\t" 4490 "movd $tmp2,$src1\n\t" 4491 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4492 "movd $dst,$tmp2\t! add reduction4I" %} 4493 ins_encode %{ 4494 int vector_len = 0; 4495 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4496 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4497 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4498 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4499 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4500 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4501 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4507 predicate(VM_Version::supports_avxonly()); 4508 match(Set dst (AddReductionVI src1 src2)); 4509 effect(TEMP tmp, TEMP tmp2); 4510 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4511 "vphaddd $tmp,$tmp,$tmp2\n\t" 4512 "vextracti128_high $tmp2,$tmp\n\t" 4513 "vpaddd $tmp,$tmp,$tmp2\n\t" 4514 "movd $tmp2,$src1\n\t" 4515 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4516 "movd $dst,$tmp2\t! add reduction8I" %} 4517 ins_encode %{ 4518 int vector_len = 1; 4519 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4520 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4521 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4522 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4523 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4524 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4525 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4531 predicate(UseAVX > 2); 4532 match(Set dst (AddReductionVI src1 src2)); 4533 effect(TEMP tmp, TEMP tmp2); 4534 format %{ "vextracti128_high $tmp,$src2\n\t" 4535 "vpaddd $tmp,$tmp,$src2\n\t" 4536 "pshufd $tmp2,$tmp,0xE\n\t" 4537 "vpaddd $tmp,$tmp,$tmp2\n\t" 4538 "pshufd $tmp2,$tmp,0x1\n\t" 4539 "vpaddd $tmp,$tmp,$tmp2\n\t" 4540 "movd $tmp2,$src1\n\t" 4541 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4542 "movd $dst,$tmp2\t! add reduction8I" %} 4543 ins_encode %{ 4544 int vector_len = 0; 4545 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4546 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4547 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4548 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4549 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4550 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4551 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4552 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4553 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4554 %} 4555 ins_pipe( pipe_slow ); 4556 %} 4557 4558 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4559 predicate(UseAVX > 2); 4560 match(Set dst (AddReductionVI src1 src2)); 4561 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4562 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4563 "vpaddd $tmp3,$tmp3,$src2\n\t" 4564 "vextracti128_high $tmp,$tmp3\n\t" 4565 "vpaddd $tmp,$tmp,$tmp3\n\t" 4566 "pshufd $tmp2,$tmp,0xE\n\t" 4567 "vpaddd $tmp,$tmp,$tmp2\n\t" 4568 "pshufd $tmp2,$tmp,0x1\n\t" 4569 "vpaddd $tmp,$tmp,$tmp2\n\t" 4570 "movd $tmp2,$src1\n\t" 4571 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4572 "movd $dst,$tmp2\t! mul reduction16I" %} 4573 ins_encode %{ 4574 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4575 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4576 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4577 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4578 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4579 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4580 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4581 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4582 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4583 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4584 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 #ifdef _LP64 4590 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4591 predicate(UseAVX > 2); 4592 match(Set dst (AddReductionVL src1 src2)); 4593 effect(TEMP tmp, TEMP tmp2); 4594 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4595 "vpaddq $tmp,$src2,$tmp2\n\t" 4596 "movdq $tmp2,$src1\n\t" 4597 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4598 "movdq $dst,$tmp2\t! add reduction2L" %} 4599 ins_encode %{ 4600 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4601 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4602 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4603 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4604 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4605 %} 4606 ins_pipe( pipe_slow ); 4607 %} 4608 4609 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4610 predicate(UseAVX > 2); 4611 match(Set dst (AddReductionVL src1 src2)); 4612 effect(TEMP tmp, TEMP tmp2); 4613 format %{ "vextracti128_high $tmp,$src2\n\t" 4614 "vpaddq $tmp2,$tmp,$src2\n\t" 4615 "pshufd $tmp,$tmp2,0xE\n\t" 4616 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4617 "movdq $tmp,$src1\n\t" 4618 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4619 "movdq $dst,$tmp2\t! add reduction4L" %} 4620 ins_encode %{ 4621 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4622 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4623 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4624 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4625 __ movdq($tmp$$XMMRegister, $src1$$Register); 4626 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4627 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4628 %} 4629 ins_pipe( pipe_slow ); 4630 %} 4631 4632 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4633 predicate(UseAVX > 2); 4634 match(Set dst (AddReductionVL src1 src2)); 4635 effect(TEMP tmp, TEMP tmp2); 4636 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4637 "vpaddq $tmp2,$tmp2,$src2\n\t" 4638 "vextracti128_high $tmp,$tmp2\n\t" 4639 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4640 "pshufd $tmp,$tmp2,0xE\n\t" 4641 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4642 "movdq $tmp,$src1\n\t" 4643 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4644 "movdq $dst,$tmp2\t! add reduction8L" %} 4645 ins_encode %{ 4646 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4647 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4648 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4649 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4650 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4651 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4652 __ movdq($tmp$$XMMRegister, $src1$$Register); 4653 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4654 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4655 %} 4656 ins_pipe( pipe_slow ); 4657 %} 4658 #endif 4659 4660 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4661 predicate(UseSSE >= 1 && UseAVX == 0); 4662 match(Set dst (AddReductionVF dst src2)); 4663 effect(TEMP dst, TEMP tmp); 4664 format %{ "addss $dst,$src2\n\t" 4665 "pshufd $tmp,$src2,0x01\n\t" 4666 "addss $dst,$tmp\t! add reduction2F" %} 4667 ins_encode %{ 4668 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4669 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4670 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4671 %} 4672 ins_pipe( pipe_slow ); 4673 %} 4674 4675 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4676 predicate(UseAVX > 0); 4677 match(Set dst (AddReductionVF dst src2)); 4678 effect(TEMP dst, TEMP tmp); 4679 format %{ "vaddss $dst,$dst,$src2\n\t" 4680 "pshufd $tmp,$src2,0x01\n\t" 4681 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4682 ins_encode %{ 4683 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4684 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4685 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4686 %} 4687 ins_pipe( pipe_slow ); 4688 %} 4689 4690 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4691 predicate(UseSSE >= 1 && UseAVX == 0); 4692 match(Set dst (AddReductionVF dst src2)); 4693 effect(TEMP dst, TEMP tmp); 4694 format %{ "addss $dst,$src2\n\t" 4695 "pshufd $tmp,$src2,0x01\n\t" 4696 "addss $dst,$tmp\n\t" 4697 "pshufd $tmp,$src2,0x02\n\t" 4698 "addss $dst,$tmp\n\t" 4699 "pshufd $tmp,$src2,0x03\n\t" 4700 "addss $dst,$tmp\t! add reduction4F" %} 4701 ins_encode %{ 4702 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4703 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4704 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4705 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4706 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4707 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4708 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4709 %} 4710 ins_pipe( pipe_slow ); 4711 %} 4712 4713 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4714 predicate(UseAVX > 0); 4715 match(Set dst (AddReductionVF dst src2)); 4716 effect(TEMP tmp, TEMP dst); 4717 format %{ "vaddss $dst,dst,$src2\n\t" 4718 "pshufd $tmp,$src2,0x01\n\t" 4719 "vaddss $dst,$dst,$tmp\n\t" 4720 "pshufd $tmp,$src2,0x02\n\t" 4721 "vaddss $dst,$dst,$tmp\n\t" 4722 "pshufd $tmp,$src2,0x03\n\t" 4723 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4724 ins_encode %{ 4725 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4726 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4727 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4728 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4729 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4730 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4731 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4732 %} 4733 ins_pipe( pipe_slow ); 4734 %} 4735 4736 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4737 predicate(UseAVX > 0); 4738 match(Set dst (AddReductionVF dst src2)); 4739 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4740 format %{ "vaddss $dst,$dst,$src2\n\t" 4741 "pshufd $tmp,$src2,0x01\n\t" 4742 "vaddss $dst,$dst,$tmp\n\t" 4743 "pshufd $tmp,$src2,0x02\n\t" 4744 "vaddss $dst,$dst,$tmp\n\t" 4745 "pshufd $tmp,$src2,0x03\n\t" 4746 "vaddss $dst,$dst,$tmp\n\t" 4747 "vextractf128_high $tmp2,$src2\n\t" 4748 "vaddss $dst,$dst,$tmp2\n\t" 4749 "pshufd $tmp,$tmp2,0x01\n\t" 4750 "vaddss $dst,$dst,$tmp\n\t" 4751 "pshufd $tmp,$tmp2,0x02\n\t" 4752 "vaddss $dst,$dst,$tmp\n\t" 4753 "pshufd $tmp,$tmp2,0x03\n\t" 4754 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4755 ins_encode %{ 4756 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4757 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4758 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4759 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4760 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4761 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4762 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4763 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4764 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4765 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4766 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4767 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4768 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4769 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4770 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4776 predicate(UseAVX > 2); 4777 match(Set dst (AddReductionVF dst src2)); 4778 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4779 format %{ "vaddss $dst,$dst,$src2\n\t" 4780 "pshufd $tmp,$src2,0x01\n\t" 4781 "vaddss $dst,$dst,$tmp\n\t" 4782 "pshufd $tmp,$src2,0x02\n\t" 4783 "vaddss $dst,$dst,$tmp\n\t" 4784 "pshufd $tmp,$src2,0x03\n\t" 4785 "vaddss $dst,$dst,$tmp\n\t" 4786 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4787 "vaddss $dst,$dst,$tmp2\n\t" 4788 "pshufd $tmp,$tmp2,0x01\n\t" 4789 "vaddss $dst,$dst,$tmp\n\t" 4790 "pshufd $tmp,$tmp2,0x02\n\t" 4791 "vaddss $dst,$dst,$tmp\n\t" 4792 "pshufd $tmp,$tmp2,0x03\n\t" 4793 "vaddss $dst,$dst,$tmp\n\t" 4794 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4795 "vaddss $dst,$dst,$tmp2\n\t" 4796 "pshufd $tmp,$tmp2,0x01\n\t" 4797 "vaddss $dst,$dst,$tmp\n\t" 4798 "pshufd $tmp,$tmp2,0x02\n\t" 4799 "vaddss $dst,$dst,$tmp\n\t" 4800 "pshufd $tmp,$tmp2,0x03\n\t" 4801 "vaddss $dst,$dst,$tmp\n\t" 4802 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4803 "vaddss $dst,$dst,$tmp2\n\t" 4804 "pshufd $tmp,$tmp2,0x01\n\t" 4805 "vaddss $dst,$dst,$tmp\n\t" 4806 "pshufd $tmp,$tmp2,0x02\n\t" 4807 "vaddss $dst,$dst,$tmp\n\t" 4808 "pshufd $tmp,$tmp2,0x03\n\t" 4809 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4810 ins_encode %{ 4811 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4812 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4813 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4814 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4815 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4816 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4817 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4818 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4819 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4820 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4821 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4822 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4823 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4824 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4825 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4826 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4827 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4828 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4829 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4830 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4831 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4832 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4833 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4834 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4835 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4836 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4837 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4838 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4839 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4840 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4841 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4842 %} 4843 ins_pipe( pipe_slow ); 4844 %} 4845 4846 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4847 predicate(UseSSE >= 1 && UseAVX == 0); 4848 match(Set dst (AddReductionVD dst src2)); 4849 effect(TEMP tmp, TEMP dst); 4850 format %{ "addsd $dst,$src2\n\t" 4851 "pshufd $tmp,$src2,0xE\n\t" 4852 "addsd $dst,$tmp\t! add reduction2D" %} 4853 ins_encode %{ 4854 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4855 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4856 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4857 %} 4858 ins_pipe( pipe_slow ); 4859 %} 4860 4861 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4862 predicate(UseAVX > 0); 4863 match(Set dst (AddReductionVD dst src2)); 4864 effect(TEMP tmp, TEMP dst); 4865 format %{ "vaddsd $dst,$dst,$src2\n\t" 4866 "pshufd $tmp,$src2,0xE\n\t" 4867 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 4868 ins_encode %{ 4869 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4870 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4871 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4872 %} 4873 ins_pipe( pipe_slow ); 4874 %} 4875 4876 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 4877 predicate(UseAVX > 0); 4878 match(Set dst (AddReductionVD dst src2)); 4879 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4880 format %{ "vaddsd $dst,$dst,$src2\n\t" 4881 "pshufd $tmp,$src2,0xE\n\t" 4882 "vaddsd $dst,$dst,$tmp\n\t" 4883 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4884 "vaddsd $dst,$dst,$tmp2\n\t" 4885 "pshufd $tmp,$tmp2,0xE\n\t" 4886 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 4887 ins_encode %{ 4888 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4889 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4890 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4891 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4892 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4893 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4894 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 4900 predicate(UseAVX > 2); 4901 match(Set dst (AddReductionVD dst src2)); 4902 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4903 format %{ "vaddsd $dst,$dst,$src2\n\t" 4904 "pshufd $tmp,$src2,0xE\n\t" 4905 "vaddsd $dst,$dst,$tmp\n\t" 4906 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4907 "vaddsd $dst,$dst,$tmp2\n\t" 4908 "pshufd $tmp,$tmp2,0xE\n\t" 4909 "vaddsd $dst,$dst,$tmp\n\t" 4910 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4911 "vaddsd $dst,$dst,$tmp2\n\t" 4912 "pshufd $tmp,$tmp2,0xE\n\t" 4913 "vaddsd $dst,$dst,$tmp\n\t" 4914 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4915 "vaddsd $dst,$dst,$tmp2\n\t" 4916 "pshufd $tmp,$tmp2,0xE\n\t" 4917 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 4918 ins_encode %{ 4919 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4920 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4921 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4922 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4923 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4924 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4925 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4926 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4927 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4928 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4929 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4930 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4931 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4932 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4933 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4939 predicate(UseSSE > 3 && UseAVX == 0); 4940 match(Set dst (MulReductionVI src1 src2)); 4941 effect(TEMP tmp, TEMP tmp2); 4942 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4943 "pmulld $tmp2,$src2\n\t" 4944 "movd $tmp,$src1\n\t" 4945 "pmulld $tmp2,$tmp\n\t" 4946 "movd $dst,$tmp2\t! mul reduction2I" %} 4947 ins_encode %{ 4948 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4949 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4950 __ movdl($tmp$$XMMRegister, $src1$$Register); 4951 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4952 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4958 predicate(UseAVX > 0); 4959 match(Set dst (MulReductionVI src1 src2)); 4960 effect(TEMP tmp, TEMP tmp2); 4961 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4962 "vpmulld $tmp,$src2,$tmp2\n\t" 4963 "movd $tmp2,$src1\n\t" 4964 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4965 "movd $dst,$tmp2\t! mul reduction2I" %} 4966 ins_encode %{ 4967 int vector_len = 0; 4968 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4969 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4970 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4971 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4972 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4978 predicate(UseSSE > 3 && UseAVX == 0); 4979 match(Set dst (MulReductionVI src1 src2)); 4980 effect(TEMP tmp, TEMP tmp2); 4981 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4982 "pmulld $tmp2,$src2\n\t" 4983 "pshufd $tmp,$tmp2,0x1\n\t" 4984 "pmulld $tmp2,$tmp\n\t" 4985 "movd $tmp,$src1\n\t" 4986 "pmulld $tmp2,$tmp\n\t" 4987 "movd $dst,$tmp2\t! mul reduction4I" %} 4988 ins_encode %{ 4989 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4990 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4991 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4992 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4993 __ movdl($tmp$$XMMRegister, $src1$$Register); 4994 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4995 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4996 %} 4997 ins_pipe( pipe_slow ); 4998 %} 4999 5000 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5001 predicate(UseAVX > 0); 5002 match(Set dst (MulReductionVI src1 src2)); 5003 effect(TEMP tmp, TEMP tmp2); 5004 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5005 "vpmulld $tmp,$src2,$tmp2\n\t" 5006 "pshufd $tmp2,$tmp,0x1\n\t" 5007 "vpmulld $tmp,$tmp,$tmp2\n\t" 5008 "movd $tmp2,$src1\n\t" 5009 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5010 "movd $dst,$tmp2\t! mul reduction4I" %} 5011 ins_encode %{ 5012 int vector_len = 0; 5013 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5014 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5015 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5016 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5017 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5018 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5019 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5020 %} 5021 ins_pipe( pipe_slow ); 5022 %} 5023 5024 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5025 predicate(UseAVX > 0); 5026 match(Set dst (MulReductionVI src1 src2)); 5027 effect(TEMP tmp, TEMP tmp2); 5028 format %{ "vextracti128_high $tmp,$src2\n\t" 5029 "vpmulld $tmp,$tmp,$src2\n\t" 5030 "pshufd $tmp2,$tmp,0xE\n\t" 5031 "vpmulld $tmp,$tmp,$tmp2\n\t" 5032 "pshufd $tmp2,$tmp,0x1\n\t" 5033 "vpmulld $tmp,$tmp,$tmp2\n\t" 5034 "movd $tmp2,$src1\n\t" 5035 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5036 "movd $dst,$tmp2\t! mul reduction8I" %} 5037 ins_encode %{ 5038 int vector_len = 0; 5039 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5040 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5041 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5042 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5043 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5044 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5045 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5046 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5047 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5048 %} 5049 ins_pipe( pipe_slow ); 5050 %} 5051 5052 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5053 predicate(UseAVX > 2); 5054 match(Set dst (MulReductionVI src1 src2)); 5055 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5056 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5057 "vpmulld $tmp3,$tmp3,$src2\n\t" 5058 "vextracti128_high $tmp,$tmp3\n\t" 5059 "vpmulld $tmp,$tmp,$src2\n\t" 5060 "pshufd $tmp2,$tmp,0xE\n\t" 5061 "vpmulld $tmp,$tmp,$tmp2\n\t" 5062 "pshufd $tmp2,$tmp,0x1\n\t" 5063 "vpmulld $tmp,$tmp,$tmp2\n\t" 5064 "movd $tmp2,$src1\n\t" 5065 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5066 "movd $dst,$tmp2\t! mul reduction16I" %} 5067 ins_encode %{ 5068 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5069 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5070 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5071 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5072 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5073 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5074 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5075 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5076 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5077 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5078 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 5083 #ifdef _LP64 5084 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5085 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5086 match(Set dst (MulReductionVL src1 src2)); 5087 effect(TEMP tmp, TEMP tmp2); 5088 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5089 "vpmullq $tmp,$src2,$tmp2\n\t" 5090 "movdq $tmp2,$src1\n\t" 5091 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5092 "movdq $dst,$tmp2\t! mul reduction2L" %} 5093 ins_encode %{ 5094 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5095 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5096 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5097 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5098 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5099 %} 5100 ins_pipe( pipe_slow ); 5101 %} 5102 5103 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5104 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5105 match(Set dst (MulReductionVL src1 src2)); 5106 effect(TEMP tmp, TEMP tmp2); 5107 format %{ "vextracti128_high $tmp,$src2\n\t" 5108 "vpmullq $tmp2,$tmp,$src2\n\t" 5109 "pshufd $tmp,$tmp2,0xE\n\t" 5110 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5111 "movdq $tmp,$src1\n\t" 5112 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5113 "movdq $dst,$tmp2\t! mul reduction4L" %} 5114 ins_encode %{ 5115 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5116 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5117 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5118 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5119 __ movdq($tmp$$XMMRegister, $src1$$Register); 5120 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5121 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5122 %} 5123 ins_pipe( pipe_slow ); 5124 %} 5125 5126 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5127 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5128 match(Set dst (MulReductionVL src1 src2)); 5129 effect(TEMP tmp, TEMP tmp2); 5130 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5131 "vpmullq $tmp2,$tmp2,$src2\n\t" 5132 "vextracti128_high $tmp,$tmp2\n\t" 5133 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5134 "pshufd $tmp,$tmp2,0xE\n\t" 5135 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5136 "movdq $tmp,$src1\n\t" 5137 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5138 "movdq $dst,$tmp2\t! mul reduction8L" %} 5139 ins_encode %{ 5140 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5141 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5142 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5143 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5144 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5145 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5146 __ movdq($tmp$$XMMRegister, $src1$$Register); 5147 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5148 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 #endif 5153 5154 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5155 predicate(UseSSE >= 1 && UseAVX == 0); 5156 match(Set dst (MulReductionVF dst src2)); 5157 effect(TEMP dst, TEMP tmp); 5158 format %{ "mulss $dst,$src2\n\t" 5159 "pshufd $tmp,$src2,0x01\n\t" 5160 "mulss $dst,$tmp\t! mul reduction2F" %} 5161 ins_encode %{ 5162 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5163 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5164 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5165 %} 5166 ins_pipe( pipe_slow ); 5167 %} 5168 5169 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5170 predicate(UseAVX > 0); 5171 match(Set dst (MulReductionVF dst src2)); 5172 effect(TEMP tmp, TEMP dst); 5173 format %{ "vmulss $dst,$dst,$src2\n\t" 5174 "pshufd $tmp,$src2,0x01\n\t" 5175 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5176 ins_encode %{ 5177 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5178 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5179 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5180 %} 5181 ins_pipe( pipe_slow ); 5182 %} 5183 5184 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5185 predicate(UseSSE >= 1 && UseAVX == 0); 5186 match(Set dst (MulReductionVF dst src2)); 5187 effect(TEMP dst, TEMP tmp); 5188 format %{ "mulss $dst,$src2\n\t" 5189 "pshufd $tmp,$src2,0x01\n\t" 5190 "mulss $dst,$tmp\n\t" 5191 "pshufd $tmp,$src2,0x02\n\t" 5192 "mulss $dst,$tmp\n\t" 5193 "pshufd $tmp,$src2,0x03\n\t" 5194 "mulss $dst,$tmp\t! mul reduction4F" %} 5195 ins_encode %{ 5196 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5197 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5198 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5199 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5200 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5201 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5202 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5203 %} 5204 ins_pipe( pipe_slow ); 5205 %} 5206 5207 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5208 predicate(UseAVX > 0); 5209 match(Set dst (MulReductionVF dst src2)); 5210 effect(TEMP tmp, TEMP dst); 5211 format %{ "vmulss $dst,$dst,$src2\n\t" 5212 "pshufd $tmp,$src2,0x01\n\t" 5213 "vmulss $dst,$dst,$tmp\n\t" 5214 "pshufd $tmp,$src2,0x02\n\t" 5215 "vmulss $dst,$dst,$tmp\n\t" 5216 "pshufd $tmp,$src2,0x03\n\t" 5217 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5218 ins_encode %{ 5219 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5220 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5221 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5222 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5223 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5224 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5225 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5226 %} 5227 ins_pipe( pipe_slow ); 5228 %} 5229 5230 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5231 predicate(UseAVX > 0); 5232 match(Set dst (MulReductionVF dst src2)); 5233 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5234 format %{ "vmulss $dst,$dst,$src2\n\t" 5235 "pshufd $tmp,$src2,0x01\n\t" 5236 "vmulss $dst,$dst,$tmp\n\t" 5237 "pshufd $tmp,$src2,0x02\n\t" 5238 "vmulss $dst,$dst,$tmp\n\t" 5239 "pshufd $tmp,$src2,0x03\n\t" 5240 "vmulss $dst,$dst,$tmp\n\t" 5241 "vextractf128_high $tmp2,$src2\n\t" 5242 "vmulss $dst,$dst,$tmp2\n\t" 5243 "pshufd $tmp,$tmp2,0x01\n\t" 5244 "vmulss $dst,$dst,$tmp\n\t" 5245 "pshufd $tmp,$tmp2,0x02\n\t" 5246 "vmulss $dst,$dst,$tmp\n\t" 5247 "pshufd $tmp,$tmp2,0x03\n\t" 5248 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5249 ins_encode %{ 5250 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5251 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5252 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5253 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5254 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5255 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5256 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5257 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5258 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5259 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5260 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5261 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5262 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5263 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5264 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5265 %} 5266 ins_pipe( pipe_slow ); 5267 %} 5268 5269 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5270 predicate(UseAVX > 2); 5271 match(Set dst (MulReductionVF dst src2)); 5272 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5273 format %{ "vmulss $dst,$dst,$src2\n\t" 5274 "pshufd $tmp,$src2,0x01\n\t" 5275 "vmulss $dst,$dst,$tmp\n\t" 5276 "pshufd $tmp,$src2,0x02\n\t" 5277 "vmulss $dst,$dst,$tmp\n\t" 5278 "pshufd $tmp,$src2,0x03\n\t" 5279 "vmulss $dst,$dst,$tmp\n\t" 5280 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5281 "vmulss $dst,$dst,$tmp2\n\t" 5282 "pshufd $tmp,$tmp2,0x01\n\t" 5283 "vmulss $dst,$dst,$tmp\n\t" 5284 "pshufd $tmp,$tmp2,0x02\n\t" 5285 "vmulss $dst,$dst,$tmp\n\t" 5286 "pshufd $tmp,$tmp2,0x03\n\t" 5287 "vmulss $dst,$dst,$tmp\n\t" 5288 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5289 "vmulss $dst,$dst,$tmp2\n\t" 5290 "pshufd $tmp,$tmp2,0x01\n\t" 5291 "vmulss $dst,$dst,$tmp\n\t" 5292 "pshufd $tmp,$tmp2,0x02\n\t" 5293 "vmulss $dst,$dst,$tmp\n\t" 5294 "pshufd $tmp,$tmp2,0x03\n\t" 5295 "vmulss $dst,$dst,$tmp\n\t" 5296 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5297 "vmulss $dst,$dst,$tmp2\n\t" 5298 "pshufd $tmp,$tmp2,0x01\n\t" 5299 "vmulss $dst,$dst,$tmp\n\t" 5300 "pshufd $tmp,$tmp2,0x02\n\t" 5301 "vmulss $dst,$dst,$tmp\n\t" 5302 "pshufd $tmp,$tmp2,0x03\n\t" 5303 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5304 ins_encode %{ 5305 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5306 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5307 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5308 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5309 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5310 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5311 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5312 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5313 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5314 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5315 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5316 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5317 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5318 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5319 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5320 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5321 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5322 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5323 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5324 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5325 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5326 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5327 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5328 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5329 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5330 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5331 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5332 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5333 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5334 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5335 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5336 %} 5337 ins_pipe( pipe_slow ); 5338 %} 5339 5340 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5341 predicate(UseSSE >= 1 && UseAVX == 0); 5342 match(Set dst (MulReductionVD dst src2)); 5343 effect(TEMP dst, TEMP tmp); 5344 format %{ "mulsd $dst,$src2\n\t" 5345 "pshufd $tmp,$src2,0xE\n\t" 5346 "mulsd $dst,$tmp\t! mul reduction2D" %} 5347 ins_encode %{ 5348 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5349 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5350 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5351 %} 5352 ins_pipe( pipe_slow ); 5353 %} 5354 5355 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5356 predicate(UseAVX > 0); 5357 match(Set dst (MulReductionVD dst src2)); 5358 effect(TEMP tmp, TEMP dst); 5359 format %{ "vmulsd $dst,$dst,$src2\n\t" 5360 "pshufd $tmp,$src2,0xE\n\t" 5361 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5362 ins_encode %{ 5363 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5364 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5365 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5366 %} 5367 ins_pipe( pipe_slow ); 5368 %} 5369 5370 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5371 predicate(UseAVX > 0); 5372 match(Set dst (MulReductionVD dst src2)); 5373 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5374 format %{ "vmulsd $dst,$dst,$src2\n\t" 5375 "pshufd $tmp,$src2,0xE\n\t" 5376 "vmulsd $dst,$dst,$tmp\n\t" 5377 "vextractf128_high $tmp2,$src2\n\t" 5378 "vmulsd $dst,$dst,$tmp2\n\t" 5379 "pshufd $tmp,$tmp2,0xE\n\t" 5380 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5381 ins_encode %{ 5382 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5383 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5384 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5385 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5386 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5387 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5388 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5389 %} 5390 ins_pipe( pipe_slow ); 5391 %} 5392 5393 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5394 predicate(UseAVX > 2); 5395 match(Set dst (MulReductionVD dst src2)); 5396 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5397 format %{ "vmulsd $dst,$dst,$src2\n\t" 5398 "pshufd $tmp,$src2,0xE\n\t" 5399 "vmulsd $dst,$dst,$tmp\n\t" 5400 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5401 "vmulsd $dst,$dst,$tmp2\n\t" 5402 "pshufd $tmp,$src2,0xE\n\t" 5403 "vmulsd $dst,$dst,$tmp\n\t" 5404 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5405 "vmulsd $dst,$dst,$tmp2\n\t" 5406 "pshufd $tmp,$tmp2,0xE\n\t" 5407 "vmulsd $dst,$dst,$tmp\n\t" 5408 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5409 "vmulsd $dst,$dst,$tmp2\n\t" 5410 "pshufd $tmp,$tmp2,0xE\n\t" 5411 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5412 ins_encode %{ 5413 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5414 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5415 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5416 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5417 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5418 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5419 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5420 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5421 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5422 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5423 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5424 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5425 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5426 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5427 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5428 %} 5429 ins_pipe( pipe_slow ); 5430 %} 5431 5432 // ====================VECTOR ARITHMETIC======================================= 5433 5434 // --------------------------------- ADD -------------------------------------- 5435 5436 // Bytes vector add 5437 instruct vadd4B(vecS dst, vecS src) %{ 5438 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5439 match(Set dst (AddVB dst src)); 5440 format %{ "paddb $dst,$src\t! add packed4B" %} 5441 ins_encode %{ 5442 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5443 %} 5444 ins_pipe( pipe_slow ); 5445 %} 5446 5447 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5448 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5449 match(Set dst (AddVB src1 src2)); 5450 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5451 ins_encode %{ 5452 int vector_len = 0; 5453 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5454 %} 5455 ins_pipe( pipe_slow ); 5456 %} 5457 5458 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5459 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5460 match(Set dst (AddVB src1 src2)); 5461 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5462 ins_encode %{ 5463 int vector_len = 0; 5464 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5465 %} 5466 ins_pipe( pipe_slow ); 5467 %} 5468 5469 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5470 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5471 match(Set dst (AddVB dst src2)); 5472 effect(TEMP src1); 5473 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5474 ins_encode %{ 5475 int vector_len = 0; 5476 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5477 %} 5478 ins_pipe( pipe_slow ); 5479 %} 5480 5481 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5482 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5483 match(Set dst (AddVB src (LoadVector mem))); 5484 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5485 ins_encode %{ 5486 int vector_len = 0; 5487 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5488 %} 5489 ins_pipe( pipe_slow ); 5490 %} 5491 5492 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5493 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5494 match(Set dst (AddVB src (LoadVector mem))); 5495 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5496 ins_encode %{ 5497 int vector_len = 0; 5498 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5499 %} 5500 ins_pipe( pipe_slow ); 5501 %} 5502 5503 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5504 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5505 match(Set dst (AddVB dst (LoadVector mem))); 5506 effect(TEMP src); 5507 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5508 ins_encode %{ 5509 int vector_len = 0; 5510 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5511 %} 5512 ins_pipe( pipe_slow ); 5513 %} 5514 5515 instruct vadd8B(vecD dst, vecD src) %{ 5516 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5517 match(Set dst (AddVB dst src)); 5518 format %{ "paddb $dst,$src\t! add packed8B" %} 5519 ins_encode %{ 5520 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5521 %} 5522 ins_pipe( pipe_slow ); 5523 %} 5524 5525 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5526 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5527 match(Set dst (AddVB src1 src2)); 5528 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5529 ins_encode %{ 5530 int vector_len = 0; 5531 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5537 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5538 match(Set dst (AddVB src1 src2)); 5539 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5540 ins_encode %{ 5541 int vector_len = 0; 5542 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5548 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5549 match(Set dst (AddVB dst src2)); 5550 effect(TEMP src1); 5551 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5552 ins_encode %{ 5553 int vector_len = 0; 5554 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5555 %} 5556 ins_pipe( pipe_slow ); 5557 %} 5558 5559 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5560 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5561 match(Set dst (AddVB src (LoadVector mem))); 5562 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5563 ins_encode %{ 5564 int vector_len = 0; 5565 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5566 %} 5567 ins_pipe( pipe_slow ); 5568 %} 5569 5570 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5571 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5572 match(Set dst (AddVB src (LoadVector mem))); 5573 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5574 ins_encode %{ 5575 int vector_len = 0; 5576 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5577 %} 5578 ins_pipe( pipe_slow ); 5579 %} 5580 5581 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5582 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5583 match(Set dst (AddVB dst (LoadVector mem))); 5584 effect(TEMP src); 5585 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5586 ins_encode %{ 5587 int vector_len = 0; 5588 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5589 %} 5590 ins_pipe( pipe_slow ); 5591 %} 5592 5593 instruct vadd16B(vecX dst, vecX src) %{ 5594 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5595 match(Set dst (AddVB dst src)); 5596 format %{ "paddb $dst,$src\t! add packed16B" %} 5597 ins_encode %{ 5598 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5599 %} 5600 ins_pipe( pipe_slow ); 5601 %} 5602 5603 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5604 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5605 match(Set dst (AddVB src1 src2)); 5606 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5607 ins_encode %{ 5608 int vector_len = 0; 5609 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5610 %} 5611 ins_pipe( pipe_slow ); 5612 %} 5613 5614 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5615 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5616 match(Set dst (AddVB src1 src2)); 5617 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5618 ins_encode %{ 5619 int vector_len = 0; 5620 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5626 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5627 match(Set dst (AddVB dst src2)); 5628 effect(TEMP src1); 5629 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5630 ins_encode %{ 5631 int vector_len = 0; 5632 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5633 %} 5634 ins_pipe( pipe_slow ); 5635 %} 5636 5637 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5638 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5639 match(Set dst (AddVB src (LoadVector mem))); 5640 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5641 ins_encode %{ 5642 int vector_len = 0; 5643 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5644 %} 5645 ins_pipe( pipe_slow ); 5646 %} 5647 5648 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5649 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5650 match(Set dst (AddVB src (LoadVector mem))); 5651 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5652 ins_encode %{ 5653 int vector_len = 0; 5654 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5655 %} 5656 ins_pipe( pipe_slow ); 5657 %} 5658 5659 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5660 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5661 match(Set dst (AddVB dst (LoadVector mem))); 5662 effect(TEMP src); 5663 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5664 ins_encode %{ 5665 int vector_len = 0; 5666 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5667 %} 5668 ins_pipe( pipe_slow ); 5669 %} 5670 5671 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5672 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5673 match(Set dst (AddVB src1 src2)); 5674 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5675 ins_encode %{ 5676 int vector_len = 1; 5677 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5683 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5684 match(Set dst (AddVB src1 src2)); 5685 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5686 ins_encode %{ 5687 int vector_len = 1; 5688 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5689 %} 5690 ins_pipe( pipe_slow ); 5691 %} 5692 5693 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5694 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5695 match(Set dst (AddVB dst src2)); 5696 effect(TEMP src1); 5697 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5698 ins_encode %{ 5699 int vector_len = 1; 5700 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5706 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5707 match(Set dst (AddVB src (LoadVector mem))); 5708 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5709 ins_encode %{ 5710 int vector_len = 1; 5711 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5717 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5718 match(Set dst (AddVB src (LoadVector mem))); 5719 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5720 ins_encode %{ 5721 int vector_len = 1; 5722 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5728 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5729 match(Set dst (AddVB dst (LoadVector mem))); 5730 effect(TEMP src); 5731 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5732 ins_encode %{ 5733 int vector_len = 1; 5734 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5740 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5741 match(Set dst (AddVB src1 src2)); 5742 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5743 ins_encode %{ 5744 int vector_len = 2; 5745 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5751 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5752 match(Set dst (AddVB src (LoadVector mem))); 5753 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5754 ins_encode %{ 5755 int vector_len = 2; 5756 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 // Shorts/Chars vector add 5762 instruct vadd2S(vecS dst, vecS src) %{ 5763 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5764 match(Set dst (AddVS dst src)); 5765 format %{ "paddw $dst,$src\t! add packed2S" %} 5766 ins_encode %{ 5767 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5773 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5774 match(Set dst (AddVS src1 src2)); 5775 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5776 ins_encode %{ 5777 int vector_len = 0; 5778 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5784 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5785 match(Set dst (AddVS src1 src2)); 5786 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5787 ins_encode %{ 5788 int vector_len = 0; 5789 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5790 %} 5791 ins_pipe( pipe_slow ); 5792 %} 5793 5794 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5795 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5796 match(Set dst (AddVS dst src2)); 5797 effect(TEMP src1); 5798 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5799 ins_encode %{ 5800 int vector_len = 0; 5801 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5802 %} 5803 ins_pipe( pipe_slow ); 5804 %} 5805 5806 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5807 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5808 match(Set dst (AddVS src (LoadVector mem))); 5809 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5810 ins_encode %{ 5811 int vector_len = 0; 5812 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5818 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5819 match(Set dst (AddVS src (LoadVector mem))); 5820 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5821 ins_encode %{ 5822 int vector_len = 0; 5823 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5824 %} 5825 ins_pipe( pipe_slow ); 5826 %} 5827 5828 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5829 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5830 match(Set dst (AddVS dst (LoadVector mem))); 5831 effect(TEMP src); 5832 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5833 ins_encode %{ 5834 int vector_len = 0; 5835 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5836 %} 5837 ins_pipe( pipe_slow ); 5838 %} 5839 5840 instruct vadd4S(vecD dst, vecD src) %{ 5841 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5842 match(Set dst (AddVS dst src)); 5843 format %{ "paddw $dst,$src\t! add packed4S" %} 5844 ins_encode %{ 5845 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5846 %} 5847 ins_pipe( pipe_slow ); 5848 %} 5849 5850 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5851 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5852 match(Set dst (AddVS src1 src2)); 5853 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5854 ins_encode %{ 5855 int vector_len = 0; 5856 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5862 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5863 match(Set dst (AddVS src1 src2)); 5864 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5865 ins_encode %{ 5866 int vector_len = 0; 5867 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5873 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5874 match(Set dst (AddVS dst src2)); 5875 effect(TEMP src1); 5876 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 5877 ins_encode %{ 5878 int vector_len = 0; 5879 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5880 %} 5881 ins_pipe( pipe_slow ); 5882 %} 5883 5884 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 5885 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5886 match(Set dst (AddVS src (LoadVector mem))); 5887 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5888 ins_encode %{ 5889 int vector_len = 0; 5890 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5891 %} 5892 ins_pipe( pipe_slow ); 5893 %} 5894 5895 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 5896 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5897 match(Set dst (AddVS src (LoadVector mem))); 5898 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5899 ins_encode %{ 5900 int vector_len = 0; 5901 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5902 %} 5903 ins_pipe( pipe_slow ); 5904 %} 5905 5906 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5907 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5908 match(Set dst (AddVS dst (LoadVector mem))); 5909 effect(TEMP src); 5910 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5911 ins_encode %{ 5912 int vector_len = 0; 5913 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5914 %} 5915 ins_pipe( pipe_slow ); 5916 %} 5917 5918 instruct vadd8S(vecX dst, vecX src) %{ 5919 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5920 match(Set dst (AddVS dst src)); 5921 format %{ "paddw $dst,$src\t! add packed8S" %} 5922 ins_encode %{ 5923 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5924 %} 5925 ins_pipe( pipe_slow ); 5926 %} 5927 5928 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5929 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5930 match(Set dst (AddVS src1 src2)); 5931 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5932 ins_encode %{ 5933 int vector_len = 0; 5934 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5940 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5941 match(Set dst (AddVS src1 src2)); 5942 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5943 ins_encode %{ 5944 int vector_len = 0; 5945 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5951 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5952 match(Set dst (AddVS dst src2)); 5953 effect(TEMP src1); 5954 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 5955 ins_encode %{ 5956 int vector_len = 0; 5957 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5958 %} 5959 ins_pipe( pipe_slow ); 5960 %} 5961 5962 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 5963 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5964 match(Set dst (AddVS src (LoadVector mem))); 5965 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5966 ins_encode %{ 5967 int vector_len = 0; 5968 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5969 %} 5970 ins_pipe( pipe_slow ); 5971 %} 5972 5973 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 5974 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5975 match(Set dst (AddVS src (LoadVector mem))); 5976 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5977 ins_encode %{ 5978 int vector_len = 0; 5979 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5985 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5986 match(Set dst (AddVS dst (LoadVector mem))); 5987 effect(TEMP src); 5988 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5989 ins_encode %{ 5990 int vector_len = 0; 5991 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5992 %} 5993 ins_pipe( pipe_slow ); 5994 %} 5995 5996 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5997 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5998 match(Set dst (AddVS src1 src2)); 5999 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6000 ins_encode %{ 6001 int vector_len = 1; 6002 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6003 %} 6004 ins_pipe( pipe_slow ); 6005 %} 6006 6007 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6008 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6009 match(Set dst (AddVS src1 src2)); 6010 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6011 ins_encode %{ 6012 int vector_len = 1; 6013 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6014 %} 6015 ins_pipe( pipe_slow ); 6016 %} 6017 6018 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6019 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6020 match(Set dst (AddVS dst src2)); 6021 effect(TEMP src1); 6022 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6023 ins_encode %{ 6024 int vector_len = 1; 6025 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6026 %} 6027 ins_pipe( pipe_slow ); 6028 %} 6029 6030 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6031 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6032 match(Set dst (AddVS src (LoadVector mem))); 6033 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6034 ins_encode %{ 6035 int vector_len = 1; 6036 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6037 %} 6038 ins_pipe( pipe_slow ); 6039 %} 6040 6041 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6042 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6043 match(Set dst (AddVS src (LoadVector mem))); 6044 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6045 ins_encode %{ 6046 int vector_len = 1; 6047 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6048 %} 6049 ins_pipe( pipe_slow ); 6050 %} 6051 6052 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6053 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6054 match(Set dst (AddVS dst (LoadVector mem))); 6055 effect(TEMP src); 6056 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6057 ins_encode %{ 6058 int vector_len = 1; 6059 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6060 %} 6061 ins_pipe( pipe_slow ); 6062 %} 6063 6064 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6065 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6066 match(Set dst (AddVS src1 src2)); 6067 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6068 ins_encode %{ 6069 int vector_len = 2; 6070 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6071 %} 6072 ins_pipe( pipe_slow ); 6073 %} 6074 6075 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6076 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6077 match(Set dst (AddVS src (LoadVector mem))); 6078 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6079 ins_encode %{ 6080 int vector_len = 2; 6081 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6082 %} 6083 ins_pipe( pipe_slow ); 6084 %} 6085 6086 // Integers vector add 6087 instruct vadd2I(vecD dst, vecD src) %{ 6088 predicate(n->as_Vector()->length() == 2); 6089 match(Set dst (AddVI dst src)); 6090 format %{ "paddd $dst,$src\t! add packed2I" %} 6091 ins_encode %{ 6092 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6093 %} 6094 ins_pipe( pipe_slow ); 6095 %} 6096 6097 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6098 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6099 match(Set dst (AddVI src1 src2)); 6100 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6101 ins_encode %{ 6102 int vector_len = 0; 6103 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6109 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6110 match(Set dst (AddVI src (LoadVector mem))); 6111 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6112 ins_encode %{ 6113 int vector_len = 0; 6114 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6115 %} 6116 ins_pipe( pipe_slow ); 6117 %} 6118 6119 instruct vadd4I(vecX dst, vecX src) %{ 6120 predicate(n->as_Vector()->length() == 4); 6121 match(Set dst (AddVI dst src)); 6122 format %{ "paddd $dst,$src\t! add packed4I" %} 6123 ins_encode %{ 6124 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6125 %} 6126 ins_pipe( pipe_slow ); 6127 %} 6128 6129 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6130 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6131 match(Set dst (AddVI src1 src2)); 6132 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6133 ins_encode %{ 6134 int vector_len = 0; 6135 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6136 %} 6137 ins_pipe( pipe_slow ); 6138 %} 6139 6140 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6141 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6142 match(Set dst (AddVI src (LoadVector mem))); 6143 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6144 ins_encode %{ 6145 int vector_len = 0; 6146 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6152 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6153 match(Set dst (AddVI src1 src2)); 6154 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6155 ins_encode %{ 6156 int vector_len = 1; 6157 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6158 %} 6159 ins_pipe( pipe_slow ); 6160 %} 6161 6162 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6163 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6164 match(Set dst (AddVI src (LoadVector mem))); 6165 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6166 ins_encode %{ 6167 int vector_len = 1; 6168 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6169 %} 6170 ins_pipe( pipe_slow ); 6171 %} 6172 6173 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6174 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6175 match(Set dst (AddVI src1 src2)); 6176 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6177 ins_encode %{ 6178 int vector_len = 2; 6179 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6180 %} 6181 ins_pipe( pipe_slow ); 6182 %} 6183 6184 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6185 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6186 match(Set dst (AddVI src (LoadVector mem))); 6187 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6188 ins_encode %{ 6189 int vector_len = 2; 6190 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6191 %} 6192 ins_pipe( pipe_slow ); 6193 %} 6194 6195 // Longs vector add 6196 instruct vadd2L(vecX dst, vecX src) %{ 6197 predicate(n->as_Vector()->length() == 2); 6198 match(Set dst (AddVL dst src)); 6199 format %{ "paddq $dst,$src\t! add packed2L" %} 6200 ins_encode %{ 6201 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6202 %} 6203 ins_pipe( pipe_slow ); 6204 %} 6205 6206 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6207 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6208 match(Set dst (AddVL src1 src2)); 6209 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6210 ins_encode %{ 6211 int vector_len = 0; 6212 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6213 %} 6214 ins_pipe( pipe_slow ); 6215 %} 6216 6217 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6218 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6219 match(Set dst (AddVL src (LoadVector mem))); 6220 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6221 ins_encode %{ 6222 int vector_len = 0; 6223 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6224 %} 6225 ins_pipe( pipe_slow ); 6226 %} 6227 6228 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6229 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6230 match(Set dst (AddVL src1 src2)); 6231 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6232 ins_encode %{ 6233 int vector_len = 1; 6234 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6235 %} 6236 ins_pipe( pipe_slow ); 6237 %} 6238 6239 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6240 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6241 match(Set dst (AddVL src (LoadVector mem))); 6242 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6243 ins_encode %{ 6244 int vector_len = 1; 6245 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6246 %} 6247 ins_pipe( pipe_slow ); 6248 %} 6249 6250 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6251 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6252 match(Set dst (AddVL src1 src2)); 6253 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6254 ins_encode %{ 6255 int vector_len = 2; 6256 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6257 %} 6258 ins_pipe( pipe_slow ); 6259 %} 6260 6261 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6262 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6263 match(Set dst (AddVL src (LoadVector mem))); 6264 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6265 ins_encode %{ 6266 int vector_len = 2; 6267 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6268 %} 6269 ins_pipe( pipe_slow ); 6270 %} 6271 6272 // Floats vector add 6273 instruct vadd2F(vecD dst, vecD src) %{ 6274 predicate(n->as_Vector()->length() == 2); 6275 match(Set dst (AddVF dst src)); 6276 format %{ "addps $dst,$src\t! add packed2F" %} 6277 ins_encode %{ 6278 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6279 %} 6280 ins_pipe( pipe_slow ); 6281 %} 6282 6283 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6284 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6285 match(Set dst (AddVF src1 src2)); 6286 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6287 ins_encode %{ 6288 int vector_len = 0; 6289 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6290 %} 6291 ins_pipe( pipe_slow ); 6292 %} 6293 6294 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6296 match(Set dst (AddVF src (LoadVector mem))); 6297 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6298 ins_encode %{ 6299 int vector_len = 0; 6300 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6301 %} 6302 ins_pipe( pipe_slow ); 6303 %} 6304 6305 instruct vadd4F(vecX dst, vecX src) %{ 6306 predicate(n->as_Vector()->length() == 4); 6307 match(Set dst (AddVF dst src)); 6308 format %{ "addps $dst,$src\t! add packed4F" %} 6309 ins_encode %{ 6310 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6311 %} 6312 ins_pipe( pipe_slow ); 6313 %} 6314 6315 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6316 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6317 match(Set dst (AddVF src1 src2)); 6318 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6319 ins_encode %{ 6320 int vector_len = 0; 6321 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6327 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6328 match(Set dst (AddVF src (LoadVector mem))); 6329 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6330 ins_encode %{ 6331 int vector_len = 0; 6332 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6333 %} 6334 ins_pipe( pipe_slow ); 6335 %} 6336 6337 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6338 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6339 match(Set dst (AddVF src1 src2)); 6340 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6341 ins_encode %{ 6342 int vector_len = 1; 6343 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6344 %} 6345 ins_pipe( pipe_slow ); 6346 %} 6347 6348 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6349 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6350 match(Set dst (AddVF src (LoadVector mem))); 6351 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6352 ins_encode %{ 6353 int vector_len = 1; 6354 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 6359 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6360 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6361 match(Set dst (AddVF src1 src2)); 6362 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6363 ins_encode %{ 6364 int vector_len = 2; 6365 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6366 %} 6367 ins_pipe( pipe_slow ); 6368 %} 6369 6370 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6371 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6372 match(Set dst (AddVF src (LoadVector mem))); 6373 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6374 ins_encode %{ 6375 int vector_len = 2; 6376 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6377 %} 6378 ins_pipe( pipe_slow ); 6379 %} 6380 6381 // Doubles vector add 6382 instruct vadd2D(vecX dst, vecX src) %{ 6383 predicate(n->as_Vector()->length() == 2); 6384 match(Set dst (AddVD dst src)); 6385 format %{ "addpd $dst,$src\t! add packed2D" %} 6386 ins_encode %{ 6387 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6394 match(Set dst (AddVD src1 src2)); 6395 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6396 ins_encode %{ 6397 int vector_len = 0; 6398 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6405 match(Set dst (AddVD src (LoadVector mem))); 6406 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6407 ins_encode %{ 6408 int vector_len = 0; 6409 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6410 %} 6411 ins_pipe( pipe_slow ); 6412 %} 6413 6414 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6416 match(Set dst (AddVD src1 src2)); 6417 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6418 ins_encode %{ 6419 int vector_len = 1; 6420 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6421 %} 6422 ins_pipe( pipe_slow ); 6423 %} 6424 6425 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6427 match(Set dst (AddVD src (LoadVector mem))); 6428 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6429 ins_encode %{ 6430 int vector_len = 1; 6431 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6437 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6438 match(Set dst (AddVD src1 src2)); 6439 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6440 ins_encode %{ 6441 int vector_len = 2; 6442 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6443 %} 6444 ins_pipe( pipe_slow ); 6445 %} 6446 6447 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6448 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6449 match(Set dst (AddVD src (LoadVector mem))); 6450 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6451 ins_encode %{ 6452 int vector_len = 2; 6453 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 // --------------------------------- SUB -------------------------------------- 6459 6460 // Bytes vector sub 6461 instruct vsub4B(vecS dst, vecS src) %{ 6462 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6463 match(Set dst (SubVB dst src)); 6464 format %{ "psubb $dst,$src\t! sub packed4B" %} 6465 ins_encode %{ 6466 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6472 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6473 match(Set dst (SubVB src1 src2)); 6474 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6475 ins_encode %{ 6476 int vector_len = 0; 6477 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6478 %} 6479 ins_pipe( pipe_slow ); 6480 %} 6481 6482 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6483 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6484 match(Set dst (SubVB src1 src2)); 6485 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6486 ins_encode %{ 6487 int vector_len = 0; 6488 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6494 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6495 match(Set dst (SubVB dst src2)); 6496 effect(TEMP src1); 6497 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6498 ins_encode %{ 6499 int vector_len = 0; 6500 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6501 %} 6502 ins_pipe( pipe_slow ); 6503 %} 6504 6505 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6506 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6507 match(Set dst (SubVB src (LoadVector mem))); 6508 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6509 ins_encode %{ 6510 int vector_len = 0; 6511 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6512 %} 6513 ins_pipe( pipe_slow ); 6514 %} 6515 6516 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6517 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6518 match(Set dst (SubVB src (LoadVector mem))); 6519 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6520 ins_encode %{ 6521 int vector_len = 0; 6522 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6523 %} 6524 ins_pipe( pipe_slow ); 6525 %} 6526 6527 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6528 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6529 match(Set dst (SubVB dst (LoadVector mem))); 6530 effect(TEMP src); 6531 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6532 ins_encode %{ 6533 int vector_len = 0; 6534 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6535 %} 6536 ins_pipe( pipe_slow ); 6537 %} 6538 6539 instruct vsub8B(vecD dst, vecD src) %{ 6540 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6541 match(Set dst (SubVB dst src)); 6542 format %{ "psubb $dst,$src\t! sub packed8B" %} 6543 ins_encode %{ 6544 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6545 %} 6546 ins_pipe( pipe_slow ); 6547 %} 6548 6549 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6550 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6551 match(Set dst (SubVB src1 src2)); 6552 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6553 ins_encode %{ 6554 int vector_len = 0; 6555 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6556 %} 6557 ins_pipe( pipe_slow ); 6558 %} 6559 6560 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6561 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6562 match(Set dst (SubVB src1 src2)); 6563 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6564 ins_encode %{ 6565 int vector_len = 0; 6566 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6567 %} 6568 ins_pipe( pipe_slow ); 6569 %} 6570 6571 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6572 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6573 match(Set dst (SubVB dst src2)); 6574 effect(TEMP src1); 6575 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6576 ins_encode %{ 6577 int vector_len = 0; 6578 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6579 %} 6580 ins_pipe( pipe_slow ); 6581 %} 6582 6583 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6584 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6585 match(Set dst (SubVB src (LoadVector mem))); 6586 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6587 ins_encode %{ 6588 int vector_len = 0; 6589 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6590 %} 6591 ins_pipe( pipe_slow ); 6592 %} 6593 6594 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6595 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6596 match(Set dst (SubVB src (LoadVector mem))); 6597 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6598 ins_encode %{ 6599 int vector_len = 0; 6600 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6601 %} 6602 ins_pipe( pipe_slow ); 6603 %} 6604 6605 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6606 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6607 match(Set dst (SubVB dst (LoadVector mem))); 6608 effect(TEMP src); 6609 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6610 ins_encode %{ 6611 int vector_len = 0; 6612 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6613 %} 6614 ins_pipe( pipe_slow ); 6615 %} 6616 6617 instruct vsub16B(vecX dst, vecX src) %{ 6618 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6619 match(Set dst (SubVB dst src)); 6620 format %{ "psubb $dst,$src\t! sub packed16B" %} 6621 ins_encode %{ 6622 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6623 %} 6624 ins_pipe( pipe_slow ); 6625 %} 6626 6627 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6628 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6629 match(Set dst (SubVB src1 src2)); 6630 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6631 ins_encode %{ 6632 int vector_len = 0; 6633 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6639 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6640 match(Set dst (SubVB src1 src2)); 6641 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6642 ins_encode %{ 6643 int vector_len = 0; 6644 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6645 %} 6646 ins_pipe( pipe_slow ); 6647 %} 6648 6649 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6650 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6651 match(Set dst (SubVB dst src2)); 6652 effect(TEMP src1); 6653 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6654 ins_encode %{ 6655 int vector_len = 0; 6656 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6657 %} 6658 ins_pipe( pipe_slow ); 6659 %} 6660 6661 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6662 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6663 match(Set dst (SubVB src (LoadVector mem))); 6664 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6665 ins_encode %{ 6666 int vector_len = 0; 6667 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6668 %} 6669 ins_pipe( pipe_slow ); 6670 %} 6671 6672 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6673 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6674 match(Set dst (SubVB src (LoadVector mem))); 6675 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6676 ins_encode %{ 6677 int vector_len = 0; 6678 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6679 %} 6680 ins_pipe( pipe_slow ); 6681 %} 6682 6683 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6684 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6685 match(Set dst (SubVB dst (LoadVector mem))); 6686 effect(TEMP src); 6687 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6688 ins_encode %{ 6689 int vector_len = 0; 6690 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6691 %} 6692 ins_pipe( pipe_slow ); 6693 %} 6694 6695 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6696 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6697 match(Set dst (SubVB src1 src2)); 6698 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6699 ins_encode %{ 6700 int vector_len = 1; 6701 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6707 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6708 match(Set dst (SubVB src1 src2)); 6709 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6710 ins_encode %{ 6711 int vector_len = 1; 6712 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6713 %} 6714 ins_pipe( pipe_slow ); 6715 %} 6716 6717 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6718 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6719 match(Set dst (SubVB dst src2)); 6720 effect(TEMP src1); 6721 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6722 ins_encode %{ 6723 int vector_len = 1; 6724 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6725 %} 6726 ins_pipe( pipe_slow ); 6727 %} 6728 6729 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6730 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6731 match(Set dst (SubVB src (LoadVector mem))); 6732 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6733 ins_encode %{ 6734 int vector_len = 1; 6735 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6741 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6742 match(Set dst (SubVB src (LoadVector mem))); 6743 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6744 ins_encode %{ 6745 int vector_len = 1; 6746 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6747 %} 6748 ins_pipe( pipe_slow ); 6749 %} 6750 6751 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6752 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6753 match(Set dst (SubVB dst (LoadVector mem))); 6754 effect(TEMP src); 6755 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6756 ins_encode %{ 6757 int vector_len = 1; 6758 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6759 %} 6760 ins_pipe( pipe_slow ); 6761 %} 6762 6763 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6764 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6765 match(Set dst (SubVB src1 src2)); 6766 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6767 ins_encode %{ 6768 int vector_len = 2; 6769 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6770 %} 6771 ins_pipe( pipe_slow ); 6772 %} 6773 6774 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6775 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6776 match(Set dst (SubVB src (LoadVector mem))); 6777 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6778 ins_encode %{ 6779 int vector_len = 2; 6780 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6781 %} 6782 ins_pipe( pipe_slow ); 6783 %} 6784 6785 // Shorts/Chars vector sub 6786 instruct vsub2S(vecS dst, vecS src) %{ 6787 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6788 match(Set dst (SubVS dst src)); 6789 format %{ "psubw $dst,$src\t! sub packed2S" %} 6790 ins_encode %{ 6791 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6792 %} 6793 ins_pipe( pipe_slow ); 6794 %} 6795 6796 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6797 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6798 match(Set dst (SubVS src1 src2)); 6799 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6800 ins_encode %{ 6801 int vector_len = 0; 6802 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6808 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6809 match(Set dst (SubVS src1 src2)); 6810 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6811 ins_encode %{ 6812 int vector_len = 0; 6813 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6814 %} 6815 ins_pipe( pipe_slow ); 6816 %} 6817 6818 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6819 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6820 match(Set dst (SubVS dst src2)); 6821 effect(TEMP src1); 6822 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6823 ins_encode %{ 6824 int vector_len = 0; 6825 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6826 %} 6827 ins_pipe( pipe_slow ); 6828 %} 6829 6830 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6831 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6832 match(Set dst (SubVS src (LoadVector mem))); 6833 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6834 ins_encode %{ 6835 int vector_len = 0; 6836 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6842 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6843 match(Set dst (SubVS src (LoadVector mem))); 6844 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6845 ins_encode %{ 6846 int vector_len = 0; 6847 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6853 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6854 match(Set dst (SubVS dst (LoadVector mem))); 6855 effect(TEMP src); 6856 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6857 ins_encode %{ 6858 int vector_len = 0; 6859 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6860 %} 6861 ins_pipe( pipe_slow ); 6862 %} 6863 6864 instruct vsub4S(vecD dst, vecD src) %{ 6865 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6866 match(Set dst (SubVS dst src)); 6867 format %{ "psubw $dst,$src\t! sub packed4S" %} 6868 ins_encode %{ 6869 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6870 %} 6871 ins_pipe( pipe_slow ); 6872 %} 6873 6874 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6875 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6876 match(Set dst (SubVS src1 src2)); 6877 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6878 ins_encode %{ 6879 int vector_len = 0; 6880 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6881 %} 6882 ins_pipe( pipe_slow ); 6883 %} 6884 6885 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6886 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6887 match(Set dst (SubVS src1 src2)); 6888 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6889 ins_encode %{ 6890 int vector_len = 0; 6891 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6897 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6898 match(Set dst (SubVS dst src2)); 6899 effect(TEMP src1); 6900 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6901 ins_encode %{ 6902 int vector_len = 0; 6903 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6904 %} 6905 ins_pipe( pipe_slow ); 6906 %} 6907 6908 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6909 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6910 match(Set dst (SubVS src (LoadVector mem))); 6911 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6912 ins_encode %{ 6913 int vector_len = 0; 6914 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6915 %} 6916 ins_pipe( pipe_slow ); 6917 %} 6918 6919 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6920 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6921 match(Set dst (SubVS src (LoadVector mem))); 6922 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6923 ins_encode %{ 6924 int vector_len = 0; 6925 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6931 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6932 match(Set dst (SubVS dst (LoadVector mem))); 6933 effect(TEMP src); 6934 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6935 ins_encode %{ 6936 int vector_len = 0; 6937 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6938 %} 6939 ins_pipe( pipe_slow ); 6940 %} 6941 6942 instruct vsub8S(vecX dst, vecX src) %{ 6943 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6944 match(Set dst (SubVS dst src)); 6945 format %{ "psubw $dst,$src\t! sub packed8S" %} 6946 ins_encode %{ 6947 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6948 %} 6949 ins_pipe( pipe_slow ); 6950 %} 6951 6952 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6953 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6954 match(Set dst (SubVS src1 src2)); 6955 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6956 ins_encode %{ 6957 int vector_len = 0; 6958 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6964 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6965 match(Set dst (SubVS src1 src2)); 6966 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6967 ins_encode %{ 6968 int vector_len = 0; 6969 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6975 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6976 match(Set dst (SubVS dst src2)); 6977 effect(TEMP src1); 6978 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6979 ins_encode %{ 6980 int vector_len = 0; 6981 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6982 %} 6983 ins_pipe( pipe_slow ); 6984 %} 6985 6986 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6987 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6988 match(Set dst (SubVS src (LoadVector mem))); 6989 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6990 ins_encode %{ 6991 int vector_len = 0; 6992 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6993 %} 6994 ins_pipe( pipe_slow ); 6995 %} 6996 6997 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6998 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6999 match(Set dst (SubVS src (LoadVector mem))); 7000 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7001 ins_encode %{ 7002 int vector_len = 0; 7003 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7009 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7010 match(Set dst (SubVS dst (LoadVector mem))); 7011 effect(TEMP src); 7012 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7013 ins_encode %{ 7014 int vector_len = 0; 7015 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7021 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7022 match(Set dst (SubVS src1 src2)); 7023 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7024 ins_encode %{ 7025 int vector_len = 1; 7026 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7027 %} 7028 ins_pipe( pipe_slow ); 7029 %} 7030 7031 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7032 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7033 match(Set dst (SubVS src1 src2)); 7034 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7035 ins_encode %{ 7036 int vector_len = 1; 7037 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7043 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7044 match(Set dst (SubVS dst src2)); 7045 effect(TEMP src1); 7046 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7047 ins_encode %{ 7048 int vector_len = 1; 7049 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7050 %} 7051 ins_pipe( pipe_slow ); 7052 %} 7053 7054 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7055 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7056 match(Set dst (SubVS src (LoadVector mem))); 7057 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7058 ins_encode %{ 7059 int vector_len = 1; 7060 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7061 %} 7062 ins_pipe( pipe_slow ); 7063 %} 7064 7065 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7066 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7067 match(Set dst (SubVS src (LoadVector mem))); 7068 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7069 ins_encode %{ 7070 int vector_len = 1; 7071 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7072 %} 7073 ins_pipe( pipe_slow ); 7074 %} 7075 7076 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7077 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7078 match(Set dst (SubVS dst (LoadVector mem))); 7079 effect(TEMP src); 7080 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7081 ins_encode %{ 7082 int vector_len = 1; 7083 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7084 %} 7085 ins_pipe( pipe_slow ); 7086 %} 7087 7088 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7089 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7090 match(Set dst (SubVS src1 src2)); 7091 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7092 ins_encode %{ 7093 int vector_len = 2; 7094 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7095 %} 7096 ins_pipe( pipe_slow ); 7097 %} 7098 7099 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7100 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7101 match(Set dst (SubVS src (LoadVector mem))); 7102 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7103 ins_encode %{ 7104 int vector_len = 2; 7105 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 // Integers vector sub 7111 instruct vsub2I(vecD dst, vecD src) %{ 7112 predicate(n->as_Vector()->length() == 2); 7113 match(Set dst (SubVI dst src)); 7114 format %{ "psubd $dst,$src\t! sub packed2I" %} 7115 ins_encode %{ 7116 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7117 %} 7118 ins_pipe( pipe_slow ); 7119 %} 7120 7121 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7122 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7123 match(Set dst (SubVI src1 src2)); 7124 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7125 ins_encode %{ 7126 int vector_len = 0; 7127 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7128 %} 7129 ins_pipe( pipe_slow ); 7130 %} 7131 7132 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7133 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7134 match(Set dst (SubVI src (LoadVector mem))); 7135 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7136 ins_encode %{ 7137 int vector_len = 0; 7138 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7139 %} 7140 ins_pipe( pipe_slow ); 7141 %} 7142 7143 instruct vsub4I(vecX dst, vecX src) %{ 7144 predicate(n->as_Vector()->length() == 4); 7145 match(Set dst (SubVI dst src)); 7146 format %{ "psubd $dst,$src\t! sub packed4I" %} 7147 ins_encode %{ 7148 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7149 %} 7150 ins_pipe( pipe_slow ); 7151 %} 7152 7153 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7154 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7155 match(Set dst (SubVI src1 src2)); 7156 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7157 ins_encode %{ 7158 int vector_len = 0; 7159 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7160 %} 7161 ins_pipe( pipe_slow ); 7162 %} 7163 7164 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7165 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7166 match(Set dst (SubVI src (LoadVector mem))); 7167 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7168 ins_encode %{ 7169 int vector_len = 0; 7170 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7176 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7177 match(Set dst (SubVI src1 src2)); 7178 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7179 ins_encode %{ 7180 int vector_len = 1; 7181 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7182 %} 7183 ins_pipe( pipe_slow ); 7184 %} 7185 7186 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7187 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7188 match(Set dst (SubVI src (LoadVector mem))); 7189 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7190 ins_encode %{ 7191 int vector_len = 1; 7192 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7198 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7199 match(Set dst (SubVI src1 src2)); 7200 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7201 ins_encode %{ 7202 int vector_len = 2; 7203 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7204 %} 7205 ins_pipe( pipe_slow ); 7206 %} 7207 7208 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7209 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7210 match(Set dst (SubVI src (LoadVector mem))); 7211 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7212 ins_encode %{ 7213 int vector_len = 2; 7214 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 // Longs vector sub 7220 instruct vsub2L(vecX dst, vecX src) %{ 7221 predicate(n->as_Vector()->length() == 2); 7222 match(Set dst (SubVL dst src)); 7223 format %{ "psubq $dst,$src\t! sub packed2L" %} 7224 ins_encode %{ 7225 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7226 %} 7227 ins_pipe( pipe_slow ); 7228 %} 7229 7230 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7231 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7232 match(Set dst (SubVL src1 src2)); 7233 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7234 ins_encode %{ 7235 int vector_len = 0; 7236 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7237 %} 7238 ins_pipe( pipe_slow ); 7239 %} 7240 7241 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7243 match(Set dst (SubVL src (LoadVector mem))); 7244 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7245 ins_encode %{ 7246 int vector_len = 0; 7247 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7253 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7254 match(Set dst (SubVL src1 src2)); 7255 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7256 ins_encode %{ 7257 int vector_len = 1; 7258 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7259 %} 7260 ins_pipe( pipe_slow ); 7261 %} 7262 7263 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7264 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7265 match(Set dst (SubVL src (LoadVector mem))); 7266 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7267 ins_encode %{ 7268 int vector_len = 1; 7269 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7275 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7276 match(Set dst (SubVL src1 src2)); 7277 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7278 ins_encode %{ 7279 int vector_len = 2; 7280 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7286 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7287 match(Set dst (SubVL src (LoadVector mem))); 7288 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7289 ins_encode %{ 7290 int vector_len = 2; 7291 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 // Floats vector sub 7297 instruct vsub2F(vecD dst, vecD src) %{ 7298 predicate(n->as_Vector()->length() == 2); 7299 match(Set dst (SubVF dst src)); 7300 format %{ "subps $dst,$src\t! sub packed2F" %} 7301 ins_encode %{ 7302 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7308 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7309 match(Set dst (SubVF src1 src2)); 7310 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7311 ins_encode %{ 7312 int vector_len = 0; 7313 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7319 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7320 match(Set dst (SubVF src (LoadVector mem))); 7321 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7322 ins_encode %{ 7323 int vector_len = 0; 7324 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct vsub4F(vecX dst, vecX src) %{ 7330 predicate(n->as_Vector()->length() == 4); 7331 match(Set dst (SubVF dst src)); 7332 format %{ "subps $dst,$src\t! sub packed4F" %} 7333 ins_encode %{ 7334 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7335 %} 7336 ins_pipe( pipe_slow ); 7337 %} 7338 7339 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7341 match(Set dst (SubVF src1 src2)); 7342 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7343 ins_encode %{ 7344 int vector_len = 0; 7345 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7346 %} 7347 ins_pipe( pipe_slow ); 7348 %} 7349 7350 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7351 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7352 match(Set dst (SubVF src (LoadVector mem))); 7353 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7354 ins_encode %{ 7355 int vector_len = 0; 7356 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7357 %} 7358 ins_pipe( pipe_slow ); 7359 %} 7360 7361 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7362 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7363 match(Set dst (SubVF src1 src2)); 7364 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7365 ins_encode %{ 7366 int vector_len = 1; 7367 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7368 %} 7369 ins_pipe( pipe_slow ); 7370 %} 7371 7372 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7374 match(Set dst (SubVF src (LoadVector mem))); 7375 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7376 ins_encode %{ 7377 int vector_len = 1; 7378 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7379 %} 7380 ins_pipe( pipe_slow ); 7381 %} 7382 7383 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7384 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7385 match(Set dst (SubVF src1 src2)); 7386 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7387 ins_encode %{ 7388 int vector_len = 2; 7389 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7390 %} 7391 ins_pipe( pipe_slow ); 7392 %} 7393 7394 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7395 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7396 match(Set dst (SubVF src (LoadVector mem))); 7397 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7398 ins_encode %{ 7399 int vector_len = 2; 7400 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 // Doubles vector sub 7406 instruct vsub2D(vecX dst, vecX src) %{ 7407 predicate(n->as_Vector()->length() == 2); 7408 match(Set dst (SubVD dst src)); 7409 format %{ "subpd $dst,$src\t! sub packed2D" %} 7410 ins_encode %{ 7411 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7418 match(Set dst (SubVD src1 src2)); 7419 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7420 ins_encode %{ 7421 int vector_len = 0; 7422 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7428 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7429 match(Set dst (SubVD src (LoadVector mem))); 7430 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7431 ins_encode %{ 7432 int vector_len = 0; 7433 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7434 %} 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7439 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7440 match(Set dst (SubVD src1 src2)); 7441 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7442 ins_encode %{ 7443 int vector_len = 1; 7444 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7445 %} 7446 ins_pipe( pipe_slow ); 7447 %} 7448 7449 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7451 match(Set dst (SubVD src (LoadVector mem))); 7452 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7453 ins_encode %{ 7454 int vector_len = 1; 7455 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7456 %} 7457 ins_pipe( pipe_slow ); 7458 %} 7459 7460 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7461 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7462 match(Set dst (SubVD src1 src2)); 7463 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7464 ins_encode %{ 7465 int vector_len = 2; 7466 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7467 %} 7468 ins_pipe( pipe_slow ); 7469 %} 7470 7471 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7472 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7473 match(Set dst (SubVD src (LoadVector mem))); 7474 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7475 ins_encode %{ 7476 int vector_len = 2; 7477 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7478 %} 7479 ins_pipe( pipe_slow ); 7480 %} 7481 7482 // --------------------------------- MUL -------------------------------------- 7483 7484 // Shorts/Chars vector mul 7485 instruct vmul2S(vecS dst, vecS src) %{ 7486 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7487 match(Set dst (MulVS dst src)); 7488 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7489 ins_encode %{ 7490 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7491 %} 7492 ins_pipe( pipe_slow ); 7493 %} 7494 7495 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7496 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7497 match(Set dst (MulVS src1 src2)); 7498 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7499 ins_encode %{ 7500 int vector_len = 0; 7501 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7507 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7508 match(Set dst (MulVS src1 src2)); 7509 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7510 ins_encode %{ 7511 int vector_len = 0; 7512 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7518 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7519 match(Set dst (MulVS dst src2)); 7520 effect(TEMP src1); 7521 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7522 ins_encode %{ 7523 int vector_len = 0; 7524 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7525 %} 7526 ins_pipe( pipe_slow ); 7527 %} 7528 7529 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7530 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7531 match(Set dst (MulVS src (LoadVector mem))); 7532 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7533 ins_encode %{ 7534 int vector_len = 0; 7535 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7536 %} 7537 ins_pipe( pipe_slow ); 7538 %} 7539 7540 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7541 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7542 match(Set dst (MulVS src (LoadVector mem))); 7543 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7544 ins_encode %{ 7545 int vector_len = 0; 7546 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7547 %} 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7552 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7553 match(Set dst (MulVS dst (LoadVector mem))); 7554 effect(TEMP src); 7555 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7556 ins_encode %{ 7557 int vector_len = 0; 7558 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7559 %} 7560 ins_pipe( pipe_slow ); 7561 %} 7562 7563 instruct vmul4S(vecD dst, vecD src) %{ 7564 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7565 match(Set dst (MulVS dst src)); 7566 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7567 ins_encode %{ 7568 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7569 %} 7570 ins_pipe( pipe_slow ); 7571 %} 7572 7573 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7574 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7575 match(Set dst (MulVS src1 src2)); 7576 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7577 ins_encode %{ 7578 int vector_len = 0; 7579 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7580 %} 7581 ins_pipe( pipe_slow ); 7582 %} 7583 7584 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7585 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7586 match(Set dst (MulVS src1 src2)); 7587 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7588 ins_encode %{ 7589 int vector_len = 0; 7590 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7591 %} 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7596 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7597 match(Set dst (MulVS dst src2)); 7598 effect(TEMP src1); 7599 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7600 ins_encode %{ 7601 int vector_len = 0; 7602 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7603 %} 7604 ins_pipe( pipe_slow ); 7605 %} 7606 7607 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7608 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7609 match(Set dst (MulVS src (LoadVector mem))); 7610 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7611 ins_encode %{ 7612 int vector_len = 0; 7613 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7619 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7620 match(Set dst (MulVS src (LoadVector mem))); 7621 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7622 ins_encode %{ 7623 int vector_len = 0; 7624 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7625 %} 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7630 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7631 match(Set dst (MulVS dst (LoadVector mem))); 7632 effect(TEMP src); 7633 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7634 ins_encode %{ 7635 int vector_len = 0; 7636 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7637 %} 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 instruct vmul8S(vecX dst, vecX src) %{ 7642 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7643 match(Set dst (MulVS dst src)); 7644 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7645 ins_encode %{ 7646 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7647 %} 7648 ins_pipe( pipe_slow ); 7649 %} 7650 7651 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7652 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7653 match(Set dst (MulVS src1 src2)); 7654 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7655 ins_encode %{ 7656 int vector_len = 0; 7657 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7658 %} 7659 ins_pipe( pipe_slow ); 7660 %} 7661 7662 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7663 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7664 match(Set dst (MulVS src1 src2)); 7665 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7666 ins_encode %{ 7667 int vector_len = 0; 7668 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7669 %} 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7674 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7675 match(Set dst (MulVS dst src2)); 7676 effect(TEMP src1); 7677 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7678 ins_encode %{ 7679 int vector_len = 0; 7680 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7681 %} 7682 ins_pipe( pipe_slow ); 7683 %} 7684 7685 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7686 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7687 match(Set dst (MulVS src (LoadVector mem))); 7688 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7689 ins_encode %{ 7690 int vector_len = 0; 7691 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7692 %} 7693 ins_pipe( pipe_slow ); 7694 %} 7695 7696 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7697 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7698 match(Set dst (MulVS src (LoadVector mem))); 7699 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7700 ins_encode %{ 7701 int vector_len = 0; 7702 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7708 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7709 match(Set dst (MulVS dst (LoadVector mem))); 7710 effect(TEMP src); 7711 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7712 ins_encode %{ 7713 int vector_len = 0; 7714 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7715 %} 7716 ins_pipe( pipe_slow ); 7717 %} 7718 7719 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7720 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7721 match(Set dst (MulVS src1 src2)); 7722 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7723 ins_encode %{ 7724 int vector_len = 1; 7725 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7726 %} 7727 ins_pipe( pipe_slow ); 7728 %} 7729 7730 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7731 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7732 match(Set dst (MulVS src1 src2)); 7733 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7734 ins_encode %{ 7735 int vector_len = 1; 7736 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7742 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7743 match(Set dst (MulVS dst src2)); 7744 effect(TEMP src1); 7745 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7746 ins_encode %{ 7747 int vector_len = 1; 7748 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7754 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7755 match(Set dst (MulVS src (LoadVector mem))); 7756 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7757 ins_encode %{ 7758 int vector_len = 1; 7759 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7760 %} 7761 ins_pipe( pipe_slow ); 7762 %} 7763 7764 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7765 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7766 match(Set dst (MulVS src (LoadVector mem))); 7767 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7768 ins_encode %{ 7769 int vector_len = 1; 7770 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7776 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7777 match(Set dst (MulVS dst (LoadVector mem))); 7778 effect(TEMP src); 7779 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7780 ins_encode %{ 7781 int vector_len = 1; 7782 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7788 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7789 match(Set dst (MulVS src1 src2)); 7790 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7791 ins_encode %{ 7792 int vector_len = 2; 7793 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7794 %} 7795 ins_pipe( pipe_slow ); 7796 %} 7797 7798 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7799 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7800 match(Set dst (MulVS src (LoadVector mem))); 7801 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7802 ins_encode %{ 7803 int vector_len = 2; 7804 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7805 %} 7806 ins_pipe( pipe_slow ); 7807 %} 7808 7809 // Integers vector mul (sse4_1) 7810 instruct vmul2I(vecD dst, vecD src) %{ 7811 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7812 match(Set dst (MulVI dst src)); 7813 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7814 ins_encode %{ 7815 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7821 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7822 match(Set dst (MulVI src1 src2)); 7823 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7824 ins_encode %{ 7825 int vector_len = 0; 7826 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7833 match(Set dst (MulVI src (LoadVector mem))); 7834 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7835 ins_encode %{ 7836 int vector_len = 0; 7837 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7838 %} 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 instruct vmul4I(vecX dst, vecX src) %{ 7843 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7844 match(Set dst (MulVI dst src)); 7845 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7846 ins_encode %{ 7847 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7848 %} 7849 ins_pipe( pipe_slow ); 7850 %} 7851 7852 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7853 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7854 match(Set dst (MulVI src1 src2)); 7855 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7856 ins_encode %{ 7857 int vector_len = 0; 7858 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7864 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7865 match(Set dst (MulVI src (LoadVector mem))); 7866 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7867 ins_encode %{ 7868 int vector_len = 0; 7869 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7870 %} 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7875 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7876 match(Set dst (MulVL src1 src2)); 7877 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7878 ins_encode %{ 7879 int vector_len = 0; 7880 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7881 %} 7882 ins_pipe( pipe_slow ); 7883 %} 7884 7885 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7886 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7887 match(Set dst (MulVL src (LoadVector mem))); 7888 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7889 ins_encode %{ 7890 int vector_len = 0; 7891 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7892 %} 7893 ins_pipe( pipe_slow ); 7894 %} 7895 7896 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7897 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7898 match(Set dst (MulVL src1 src2)); 7899 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7900 ins_encode %{ 7901 int vector_len = 1; 7902 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7903 %} 7904 ins_pipe( pipe_slow ); 7905 %} 7906 7907 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7908 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7909 match(Set dst (MulVL src (LoadVector mem))); 7910 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7911 ins_encode %{ 7912 int vector_len = 1; 7913 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7914 %} 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7919 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7920 match(Set dst (MulVL src1 src2)); 7921 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7922 ins_encode %{ 7923 int vector_len = 2; 7924 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7925 %} 7926 ins_pipe( pipe_slow ); 7927 %} 7928 7929 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7930 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7931 match(Set dst (MulVL src (LoadVector mem))); 7932 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7933 ins_encode %{ 7934 int vector_len = 2; 7935 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7936 %} 7937 ins_pipe( pipe_slow ); 7938 %} 7939 7940 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7941 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7942 match(Set dst (MulVI src1 src2)); 7943 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7944 ins_encode %{ 7945 int vector_len = 1; 7946 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7952 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7953 match(Set dst (MulVI src (LoadVector mem))); 7954 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7955 ins_encode %{ 7956 int vector_len = 1; 7957 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7958 %} 7959 ins_pipe( pipe_slow ); 7960 %} 7961 7962 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7963 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7964 match(Set dst (MulVI src1 src2)); 7965 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7966 ins_encode %{ 7967 int vector_len = 2; 7968 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7969 %} 7970 ins_pipe( pipe_slow ); 7971 %} 7972 7973 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7974 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7975 match(Set dst (MulVI src (LoadVector mem))); 7976 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7977 ins_encode %{ 7978 int vector_len = 2; 7979 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 // Floats vector mul 7985 instruct vmul2F(vecD dst, vecD src) %{ 7986 predicate(n->as_Vector()->length() == 2); 7987 match(Set dst (MulVF dst src)); 7988 format %{ "mulps $dst,$src\t! mul packed2F" %} 7989 ins_encode %{ 7990 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7996 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7997 match(Set dst (MulVF src1 src2)); 7998 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 7999 ins_encode %{ 8000 int vector_len = 0; 8001 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8002 %} 8003 ins_pipe( pipe_slow ); 8004 %} 8005 8006 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8007 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8008 match(Set dst (MulVF src (LoadVector mem))); 8009 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8010 ins_encode %{ 8011 int vector_len = 0; 8012 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 instruct vmul4F(vecX dst, vecX src) %{ 8018 predicate(n->as_Vector()->length() == 4); 8019 match(Set dst (MulVF dst src)); 8020 format %{ "mulps $dst,$src\t! mul packed4F" %} 8021 ins_encode %{ 8022 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8028 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8029 match(Set dst (MulVF src1 src2)); 8030 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8031 ins_encode %{ 8032 int vector_len = 0; 8033 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8034 %} 8035 ins_pipe( pipe_slow ); 8036 %} 8037 8038 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8039 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8040 match(Set dst (MulVF src (LoadVector mem))); 8041 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8042 ins_encode %{ 8043 int vector_len = 0; 8044 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8050 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8051 match(Set dst (MulVF src1 src2)); 8052 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8053 ins_encode %{ 8054 int vector_len = 1; 8055 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8056 %} 8057 ins_pipe( pipe_slow ); 8058 %} 8059 8060 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8061 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8062 match(Set dst (MulVF src (LoadVector mem))); 8063 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8064 ins_encode %{ 8065 int vector_len = 1; 8066 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8067 %} 8068 ins_pipe( pipe_slow ); 8069 %} 8070 8071 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8072 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8073 match(Set dst (MulVF src1 src2)); 8074 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8075 ins_encode %{ 8076 int vector_len = 2; 8077 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8078 %} 8079 ins_pipe( pipe_slow ); 8080 %} 8081 8082 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8083 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8084 match(Set dst (MulVF src (LoadVector mem))); 8085 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8086 ins_encode %{ 8087 int vector_len = 2; 8088 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8089 %} 8090 ins_pipe( pipe_slow ); 8091 %} 8092 8093 // Doubles vector mul 8094 instruct vmul2D(vecX dst, vecX src) %{ 8095 predicate(n->as_Vector()->length() == 2); 8096 match(Set dst (MulVD dst src)); 8097 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8098 ins_encode %{ 8099 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8100 %} 8101 ins_pipe( pipe_slow ); 8102 %} 8103 8104 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8105 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8106 match(Set dst (MulVD src1 src2)); 8107 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8108 ins_encode %{ 8109 int vector_len = 0; 8110 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8111 %} 8112 ins_pipe( pipe_slow ); 8113 %} 8114 8115 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8116 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8117 match(Set dst (MulVD src (LoadVector mem))); 8118 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8119 ins_encode %{ 8120 int vector_len = 0; 8121 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8127 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8128 match(Set dst (MulVD src1 src2)); 8129 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8130 ins_encode %{ 8131 int vector_len = 1; 8132 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8138 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8139 match(Set dst (MulVD src (LoadVector mem))); 8140 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8141 ins_encode %{ 8142 int vector_len = 1; 8143 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8144 %} 8145 ins_pipe( pipe_slow ); 8146 %} 8147 8148 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8149 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8150 match(Set dst (MulVD src1 src2)); 8151 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8152 ins_encode %{ 8153 int vector_len = 2; 8154 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8160 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8161 match(Set dst (MulVD src (LoadVector mem))); 8162 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8163 ins_encode %{ 8164 int vector_len = 2; 8165 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8171 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8172 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8173 effect(TEMP dst, USE src1, USE src2); 8174 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8175 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8176 %} 8177 ins_encode %{ 8178 int vector_len = 1; 8179 int cond = (Assembler::Condition)($copnd$$cmpcode); 8180 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8181 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8182 %} 8183 ins_pipe( pipe_slow ); 8184 %} 8185 8186 // --------------------------------- DIV -------------------------------------- 8187 8188 // Floats vector div 8189 instruct vdiv2F(vecD dst, vecD src) %{ 8190 predicate(n->as_Vector()->length() == 2); 8191 match(Set dst (DivVF dst src)); 8192 format %{ "divps $dst,$src\t! div packed2F" %} 8193 ins_encode %{ 8194 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8195 %} 8196 ins_pipe( pipe_slow ); 8197 %} 8198 8199 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8200 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8201 match(Set dst (DivVF src1 src2)); 8202 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8203 ins_encode %{ 8204 int vector_len = 0; 8205 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8206 %} 8207 ins_pipe( pipe_slow ); 8208 %} 8209 8210 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8211 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8212 match(Set dst (DivVF src (LoadVector mem))); 8213 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8214 ins_encode %{ 8215 int vector_len = 0; 8216 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vdiv4F(vecX dst, vecX src) %{ 8222 predicate(n->as_Vector()->length() == 4); 8223 match(Set dst (DivVF dst src)); 8224 format %{ "divps $dst,$src\t! div packed4F" %} 8225 ins_encode %{ 8226 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8227 %} 8228 ins_pipe( pipe_slow ); 8229 %} 8230 8231 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8232 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8233 match(Set dst (DivVF src1 src2)); 8234 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8235 ins_encode %{ 8236 int vector_len = 0; 8237 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8238 %} 8239 ins_pipe( pipe_slow ); 8240 %} 8241 8242 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8243 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8244 match(Set dst (DivVF src (LoadVector mem))); 8245 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8246 ins_encode %{ 8247 int vector_len = 0; 8248 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8249 %} 8250 ins_pipe( pipe_slow ); 8251 %} 8252 8253 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8254 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8255 match(Set dst (DivVF src1 src2)); 8256 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8257 ins_encode %{ 8258 int vector_len = 1; 8259 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8260 %} 8261 ins_pipe( pipe_slow ); 8262 %} 8263 8264 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8265 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8266 match(Set dst (DivVF src (LoadVector mem))); 8267 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8268 ins_encode %{ 8269 int vector_len = 1; 8270 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8271 %} 8272 ins_pipe( pipe_slow ); 8273 %} 8274 8275 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8276 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8277 match(Set dst (DivVF src1 src2)); 8278 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8279 ins_encode %{ 8280 int vector_len = 2; 8281 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8282 %} 8283 ins_pipe( pipe_slow ); 8284 %} 8285 8286 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8287 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8288 match(Set dst (DivVF src (LoadVector mem))); 8289 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8290 ins_encode %{ 8291 int vector_len = 2; 8292 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8293 %} 8294 ins_pipe( pipe_slow ); 8295 %} 8296 8297 // Doubles vector div 8298 instruct vdiv2D(vecX dst, vecX src) %{ 8299 predicate(n->as_Vector()->length() == 2); 8300 match(Set dst (DivVD dst src)); 8301 format %{ "divpd $dst,$src\t! div packed2D" %} 8302 ins_encode %{ 8303 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8304 %} 8305 ins_pipe( pipe_slow ); 8306 %} 8307 8308 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8309 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8310 match(Set dst (DivVD src1 src2)); 8311 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8312 ins_encode %{ 8313 int vector_len = 0; 8314 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8320 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8321 match(Set dst (DivVD src (LoadVector mem))); 8322 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8323 ins_encode %{ 8324 int vector_len = 0; 8325 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8326 %} 8327 ins_pipe( pipe_slow ); 8328 %} 8329 8330 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8331 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8332 match(Set dst (DivVD src1 src2)); 8333 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8334 ins_encode %{ 8335 int vector_len = 1; 8336 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8337 %} 8338 ins_pipe( pipe_slow ); 8339 %} 8340 8341 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8342 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8343 match(Set dst (DivVD src (LoadVector mem))); 8344 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8345 ins_encode %{ 8346 int vector_len = 1; 8347 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8348 %} 8349 ins_pipe( pipe_slow ); 8350 %} 8351 8352 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8353 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8354 match(Set dst (DivVD src1 src2)); 8355 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8356 ins_encode %{ 8357 int vector_len = 2; 8358 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8359 %} 8360 ins_pipe( pipe_slow ); 8361 %} 8362 8363 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8364 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8365 match(Set dst (DivVD src (LoadVector mem))); 8366 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8367 ins_encode %{ 8368 int vector_len = 2; 8369 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8370 %} 8371 ins_pipe( pipe_slow ); 8372 %} 8373 8374 // ------------------------------ Shift --------------------------------------- 8375 8376 // Left and right shift count vectors are the same on x86 8377 // (only lowest bits of xmm reg are used for count). 8378 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8379 match(Set dst (LShiftCntV cnt)); 8380 match(Set dst (RShiftCntV cnt)); 8381 format %{ "movd $dst,$cnt\t! load shift count" %} 8382 ins_encode %{ 8383 __ movdl($dst$$XMMRegister, $cnt$$Register); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 // --------------------------------- Sqrt -------------------------------------- 8389 8390 // Floating point vector sqrt - double precision only 8391 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8392 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8393 match(Set dst (SqrtVD src)); 8394 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8395 ins_encode %{ 8396 int vector_len = 0; 8397 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8403 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8404 match(Set dst (SqrtVD (LoadVector mem))); 8405 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8406 ins_encode %{ 8407 int vector_len = 0; 8408 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8409 %} 8410 ins_pipe( pipe_slow ); 8411 %} 8412 8413 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8414 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8415 match(Set dst (SqrtVD src)); 8416 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8417 ins_encode %{ 8418 int vector_len = 1; 8419 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 8424 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8425 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8426 match(Set dst (SqrtVD (LoadVector mem))); 8427 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8428 ins_encode %{ 8429 int vector_len = 1; 8430 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8431 %} 8432 ins_pipe( pipe_slow ); 8433 %} 8434 8435 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8436 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8437 match(Set dst (SqrtVD src)); 8438 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8439 ins_encode %{ 8440 int vector_len = 2; 8441 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8442 %} 8443 ins_pipe( pipe_slow ); 8444 %} 8445 8446 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8447 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8448 match(Set dst (SqrtVD (LoadVector mem))); 8449 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8450 ins_encode %{ 8451 int vector_len = 2; 8452 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 8457 // ------------------------------ LeftShift ----------------------------------- 8458 8459 // Shorts/Chars vector left shift 8460 instruct vsll2S(vecS dst, vecS shift) %{ 8461 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8462 match(Set dst (LShiftVS dst shift)); 8463 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8464 ins_encode %{ 8465 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8466 %} 8467 ins_pipe( pipe_slow ); 8468 %} 8469 8470 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8471 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8472 match(Set dst (LShiftVS dst shift)); 8473 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8474 ins_encode %{ 8475 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8476 %} 8477 ins_pipe( pipe_slow ); 8478 %} 8479 8480 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8481 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8482 match(Set dst (LShiftVS src shift)); 8483 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8484 ins_encode %{ 8485 int vector_len = 0; 8486 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8487 %} 8488 ins_pipe( pipe_slow ); 8489 %} 8490 8491 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8492 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8493 match(Set dst (LShiftVS src shift)); 8494 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8495 ins_encode %{ 8496 int vector_len = 0; 8497 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8503 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8504 match(Set dst (LShiftVS dst shift)); 8505 effect(TEMP src); 8506 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8507 ins_encode %{ 8508 int vector_len = 0; 8509 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8510 %} 8511 ins_pipe( pipe_slow ); 8512 %} 8513 8514 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8515 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8516 match(Set dst (LShiftVS src shift)); 8517 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8518 ins_encode %{ 8519 int vector_len = 0; 8520 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8521 %} 8522 ins_pipe( pipe_slow ); 8523 %} 8524 8525 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8526 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8527 match(Set dst (LShiftVS src shift)); 8528 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8529 ins_encode %{ 8530 int vector_len = 0; 8531 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8532 %} 8533 ins_pipe( pipe_slow ); 8534 %} 8535 8536 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8537 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8538 match(Set dst (LShiftVS dst shift)); 8539 effect(TEMP src); 8540 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8541 ins_encode %{ 8542 int vector_len = 0; 8543 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8544 %} 8545 ins_pipe( pipe_slow ); 8546 %} 8547 8548 instruct vsll4S(vecD dst, vecS shift) %{ 8549 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8550 match(Set dst (LShiftVS dst shift)); 8551 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8552 ins_encode %{ 8553 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8554 %} 8555 ins_pipe( pipe_slow ); 8556 %} 8557 8558 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8559 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8560 match(Set dst (LShiftVS dst shift)); 8561 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8562 ins_encode %{ 8563 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8569 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8570 match(Set dst (LShiftVS src shift)); 8571 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8572 ins_encode %{ 8573 int vector_len = 0; 8574 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8580 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8581 match(Set dst (LShiftVS src shift)); 8582 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8583 ins_encode %{ 8584 int vector_len = 0; 8585 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8591 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8592 match(Set dst (LShiftVS dst shift)); 8593 effect(TEMP src); 8594 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8595 ins_encode %{ 8596 int vector_len = 0; 8597 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8598 %} 8599 ins_pipe( pipe_slow ); 8600 %} 8601 8602 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8603 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8604 match(Set dst (LShiftVS src shift)); 8605 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8606 ins_encode %{ 8607 int vector_len = 0; 8608 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8609 %} 8610 ins_pipe( pipe_slow ); 8611 %} 8612 8613 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8614 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8615 match(Set dst (LShiftVS src shift)); 8616 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8617 ins_encode %{ 8618 int vector_len = 0; 8619 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8620 %} 8621 ins_pipe( pipe_slow ); 8622 %} 8623 8624 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8625 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8626 match(Set dst (LShiftVS dst shift)); 8627 effect(TEMP src); 8628 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8629 ins_encode %{ 8630 int vector_len = 0; 8631 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8632 %} 8633 ins_pipe( pipe_slow ); 8634 %} 8635 8636 instruct vsll8S(vecX dst, vecS shift) %{ 8637 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8638 match(Set dst (LShiftVS dst shift)); 8639 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8640 ins_encode %{ 8641 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8645 8646 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8647 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8648 match(Set dst (LShiftVS dst shift)); 8649 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8650 ins_encode %{ 8651 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8652 %} 8653 ins_pipe( pipe_slow ); 8654 %} 8655 8656 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8657 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8658 match(Set dst (LShiftVS src shift)); 8659 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8660 ins_encode %{ 8661 int vector_len = 0; 8662 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8663 %} 8664 ins_pipe( pipe_slow ); 8665 %} 8666 8667 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8668 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8669 match(Set dst (LShiftVS src shift)); 8670 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8671 ins_encode %{ 8672 int vector_len = 0; 8673 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8674 %} 8675 ins_pipe( pipe_slow ); 8676 %} 8677 8678 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8679 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8680 match(Set dst (LShiftVS dst shift)); 8681 effect(TEMP src); 8682 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8683 ins_encode %{ 8684 int vector_len = 0; 8685 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8691 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8692 match(Set dst (LShiftVS src shift)); 8693 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8694 ins_encode %{ 8695 int vector_len = 0; 8696 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8697 %} 8698 ins_pipe( pipe_slow ); 8699 %} 8700 8701 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8702 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8703 match(Set dst (LShiftVS src shift)); 8704 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8705 ins_encode %{ 8706 int vector_len = 0; 8707 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8713 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8714 match(Set dst (LShiftVS dst shift)); 8715 effect(TEMP src); 8716 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8717 ins_encode %{ 8718 int vector_len = 0; 8719 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8725 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8726 match(Set dst (LShiftVS src shift)); 8727 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8728 ins_encode %{ 8729 int vector_len = 1; 8730 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8731 %} 8732 ins_pipe( pipe_slow ); 8733 %} 8734 8735 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8736 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8737 match(Set dst (LShiftVS src shift)); 8738 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8739 ins_encode %{ 8740 int vector_len = 1; 8741 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8747 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8748 match(Set dst (LShiftVS dst shift)); 8749 effect(TEMP src); 8750 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8751 ins_encode %{ 8752 int vector_len = 1; 8753 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8754 %} 8755 ins_pipe( pipe_slow ); 8756 %} 8757 8758 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 8759 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8760 match(Set dst (LShiftVS src shift)); 8761 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8762 ins_encode %{ 8763 int vector_len = 1; 8764 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8765 %} 8766 ins_pipe( pipe_slow ); 8767 %} 8768 8769 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 8770 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8771 match(Set dst (LShiftVS src shift)); 8772 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8773 ins_encode %{ 8774 int vector_len = 1; 8775 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 8781 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8782 match(Set dst (LShiftVS dst shift)); 8783 effect(TEMP src); 8784 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8785 ins_encode %{ 8786 int vector_len = 1; 8787 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8788 %} 8789 ins_pipe( pipe_slow ); 8790 %} 8791 8792 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8793 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8794 match(Set dst (LShiftVS src shift)); 8795 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8796 ins_encode %{ 8797 int vector_len = 2; 8798 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8799 %} 8800 ins_pipe( pipe_slow ); 8801 %} 8802 8803 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8804 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8805 match(Set dst (LShiftVS src shift)); 8806 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8807 ins_encode %{ 8808 int vector_len = 2; 8809 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8810 %} 8811 ins_pipe( pipe_slow ); 8812 %} 8813 8814 // Integers vector left shift 8815 instruct vsll2I(vecD dst, vecS shift) %{ 8816 predicate(n->as_Vector()->length() == 2); 8817 match(Set dst (LShiftVI dst shift)); 8818 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8819 ins_encode %{ 8820 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8821 %} 8822 ins_pipe( pipe_slow ); 8823 %} 8824 8825 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8826 predicate(n->as_Vector()->length() == 2); 8827 match(Set dst (LShiftVI dst shift)); 8828 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8829 ins_encode %{ 8830 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8831 %} 8832 ins_pipe( pipe_slow ); 8833 %} 8834 8835 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8836 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8837 match(Set dst (LShiftVI src shift)); 8838 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8839 ins_encode %{ 8840 int vector_len = 0; 8841 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8842 %} 8843 ins_pipe( pipe_slow ); 8844 %} 8845 8846 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8847 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8848 match(Set dst (LShiftVI src shift)); 8849 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8850 ins_encode %{ 8851 int vector_len = 0; 8852 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8853 %} 8854 ins_pipe( pipe_slow ); 8855 %} 8856 8857 instruct vsll4I(vecX dst, vecS shift) %{ 8858 predicate(n->as_Vector()->length() == 4); 8859 match(Set dst (LShiftVI dst shift)); 8860 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8861 ins_encode %{ 8862 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8863 %} 8864 ins_pipe( pipe_slow ); 8865 %} 8866 8867 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8868 predicate(n->as_Vector()->length() == 4); 8869 match(Set dst (LShiftVI dst shift)); 8870 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8871 ins_encode %{ 8872 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8873 %} 8874 ins_pipe( pipe_slow ); 8875 %} 8876 8877 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8878 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8879 match(Set dst (LShiftVI src shift)); 8880 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8881 ins_encode %{ 8882 int vector_len = 0; 8883 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8884 %} 8885 ins_pipe( pipe_slow ); 8886 %} 8887 8888 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8889 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8890 match(Set dst (LShiftVI src shift)); 8891 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8892 ins_encode %{ 8893 int vector_len = 0; 8894 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8895 %} 8896 ins_pipe( pipe_slow ); 8897 %} 8898 8899 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8900 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8901 match(Set dst (LShiftVI src shift)); 8902 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8903 ins_encode %{ 8904 int vector_len = 1; 8905 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8911 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8912 match(Set dst (LShiftVI src shift)); 8913 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8914 ins_encode %{ 8915 int vector_len = 1; 8916 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8917 %} 8918 ins_pipe( pipe_slow ); 8919 %} 8920 8921 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8922 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8923 match(Set dst (LShiftVI src shift)); 8924 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8925 ins_encode %{ 8926 int vector_len = 2; 8927 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8928 %} 8929 ins_pipe( pipe_slow ); 8930 %} 8931 8932 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8933 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8934 match(Set dst (LShiftVI src shift)); 8935 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8936 ins_encode %{ 8937 int vector_len = 2; 8938 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8939 %} 8940 ins_pipe( pipe_slow ); 8941 %} 8942 8943 // Longs vector left shift 8944 instruct vsll2L(vecX dst, vecS shift) %{ 8945 predicate(n->as_Vector()->length() == 2); 8946 match(Set dst (LShiftVL dst shift)); 8947 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8948 ins_encode %{ 8949 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8950 %} 8951 ins_pipe( pipe_slow ); 8952 %} 8953 8954 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8955 predicate(n->as_Vector()->length() == 2); 8956 match(Set dst (LShiftVL dst shift)); 8957 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8958 ins_encode %{ 8959 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8960 %} 8961 ins_pipe( pipe_slow ); 8962 %} 8963 8964 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8965 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8966 match(Set dst (LShiftVL src shift)); 8967 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8968 ins_encode %{ 8969 int vector_len = 0; 8970 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8971 %} 8972 ins_pipe( pipe_slow ); 8973 %} 8974 8975 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8976 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8977 match(Set dst (LShiftVL src shift)); 8978 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8979 ins_encode %{ 8980 int vector_len = 0; 8981 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8982 %} 8983 ins_pipe( pipe_slow ); 8984 %} 8985 8986 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8987 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8988 match(Set dst (LShiftVL src shift)); 8989 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8990 ins_encode %{ 8991 int vector_len = 1; 8992 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8993 %} 8994 ins_pipe( pipe_slow ); 8995 %} 8996 8997 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8998 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8999 match(Set dst (LShiftVL src shift)); 9000 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9001 ins_encode %{ 9002 int vector_len = 1; 9003 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9004 %} 9005 ins_pipe( pipe_slow ); 9006 %} 9007 9008 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9009 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9010 match(Set dst (LShiftVL src shift)); 9011 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9012 ins_encode %{ 9013 int vector_len = 2; 9014 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9015 %} 9016 ins_pipe( pipe_slow ); 9017 %} 9018 9019 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9020 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9021 match(Set dst (LShiftVL src shift)); 9022 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9023 ins_encode %{ 9024 int vector_len = 2; 9025 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9026 %} 9027 ins_pipe( pipe_slow ); 9028 %} 9029 9030 // ----------------------- LogicalRightShift ----------------------------------- 9031 9032 // Shorts vector logical right shift produces incorrect Java result 9033 // for negative data because java code convert short value into int with 9034 // sign extension before a shift. But char vectors are fine since chars are 9035 // unsigned values. 9036 9037 instruct vsrl2S(vecS dst, vecS shift) %{ 9038 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9039 match(Set dst (URShiftVS dst shift)); 9040 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9041 ins_encode %{ 9042 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9048 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9049 match(Set dst (URShiftVS dst shift)); 9050 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9051 ins_encode %{ 9052 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9058 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9059 match(Set dst (URShiftVS src shift)); 9060 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9061 ins_encode %{ 9062 int vector_len = 0; 9063 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9069 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9070 match(Set dst (URShiftVS src shift)); 9071 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9072 ins_encode %{ 9073 int vector_len = 0; 9074 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9075 %} 9076 ins_pipe( pipe_slow ); 9077 %} 9078 9079 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9080 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9081 match(Set dst (URShiftVS dst shift)); 9082 effect(TEMP src); 9083 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9084 ins_encode %{ 9085 int vector_len = 0; 9086 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9087 %} 9088 ins_pipe( pipe_slow ); 9089 %} 9090 9091 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9092 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9093 match(Set dst (URShiftVS src shift)); 9094 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9095 ins_encode %{ 9096 int vector_len = 0; 9097 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9098 %} 9099 ins_pipe( pipe_slow ); 9100 %} 9101 9102 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9103 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9104 match(Set dst (URShiftVS src shift)); 9105 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9106 ins_encode %{ 9107 int vector_len = 0; 9108 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9109 %} 9110 ins_pipe( pipe_slow ); 9111 %} 9112 9113 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9114 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9115 match(Set dst (URShiftVS dst shift)); 9116 effect(TEMP src); 9117 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9118 ins_encode %{ 9119 int vector_len = 0; 9120 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vsrl4S(vecD dst, vecS shift) %{ 9126 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9127 match(Set dst (URShiftVS dst shift)); 9128 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9129 ins_encode %{ 9130 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9131 %} 9132 ins_pipe( pipe_slow ); 9133 %} 9134 9135 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9136 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9137 match(Set dst (URShiftVS dst shift)); 9138 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9139 ins_encode %{ 9140 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9141 %} 9142 ins_pipe( pipe_slow ); 9143 %} 9144 9145 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9146 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9147 match(Set dst (URShiftVS src shift)); 9148 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9149 ins_encode %{ 9150 int vector_len = 0; 9151 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9152 %} 9153 ins_pipe( pipe_slow ); 9154 %} 9155 9156 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9157 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9158 match(Set dst (URShiftVS src shift)); 9159 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9160 ins_encode %{ 9161 int vector_len = 0; 9162 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9163 %} 9164 ins_pipe( pipe_slow ); 9165 %} 9166 9167 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9168 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9169 match(Set dst (URShiftVS dst shift)); 9170 effect(TEMP src); 9171 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9172 ins_encode %{ 9173 int vector_len = 0; 9174 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9180 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9181 match(Set dst (URShiftVS src shift)); 9182 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9183 ins_encode %{ 9184 int vector_len = 0; 9185 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9191 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9192 match(Set dst (URShiftVS src shift)); 9193 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9194 ins_encode %{ 9195 int vector_len = 0; 9196 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9197 %} 9198 ins_pipe( pipe_slow ); 9199 %} 9200 9201 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9202 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9203 match(Set dst (URShiftVS dst shift)); 9204 effect(TEMP src); 9205 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9206 ins_encode %{ 9207 int vector_len = 0; 9208 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9209 %} 9210 ins_pipe( pipe_slow ); 9211 %} 9212 9213 instruct vsrl8S(vecX dst, vecS shift) %{ 9214 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9215 match(Set dst (URShiftVS dst shift)); 9216 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9217 ins_encode %{ 9218 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9219 %} 9220 ins_pipe( pipe_slow ); 9221 %} 9222 9223 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9224 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9225 match(Set dst (URShiftVS dst shift)); 9226 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9227 ins_encode %{ 9228 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9229 %} 9230 ins_pipe( pipe_slow ); 9231 %} 9232 9233 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9234 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9235 match(Set dst (URShiftVS src shift)); 9236 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9237 ins_encode %{ 9238 int vector_len = 0; 9239 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9240 %} 9241 ins_pipe( pipe_slow ); 9242 %} 9243 9244 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9245 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9246 match(Set dst (URShiftVS src shift)); 9247 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9248 ins_encode %{ 9249 int vector_len = 0; 9250 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9256 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9257 match(Set dst (URShiftVS dst shift)); 9258 effect(TEMP src); 9259 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9260 ins_encode %{ 9261 int vector_len = 0; 9262 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9263 %} 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9268 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9269 match(Set dst (URShiftVS src shift)); 9270 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9271 ins_encode %{ 9272 int vector_len = 0; 9273 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9279 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9280 match(Set dst (URShiftVS src shift)); 9281 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9282 ins_encode %{ 9283 int vector_len = 0; 9284 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9290 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9291 match(Set dst (URShiftVS dst shift)); 9292 effect(TEMP src); 9293 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9294 ins_encode %{ 9295 int vector_len = 0; 9296 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9302 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9303 match(Set dst (URShiftVS src shift)); 9304 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9305 ins_encode %{ 9306 int vector_len = 1; 9307 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9313 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9314 match(Set dst (URShiftVS src shift)); 9315 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9316 ins_encode %{ 9317 int vector_len = 1; 9318 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9319 %} 9320 ins_pipe( pipe_slow ); 9321 %} 9322 9323 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9324 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9325 match(Set dst (URShiftVS dst shift)); 9326 effect(TEMP src); 9327 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9328 ins_encode %{ 9329 int vector_len = 1; 9330 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9331 %} 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9336 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9337 match(Set dst (URShiftVS src shift)); 9338 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9339 ins_encode %{ 9340 int vector_len = 1; 9341 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9347 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9348 match(Set dst (URShiftVS src shift)); 9349 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9350 ins_encode %{ 9351 int vector_len = 1; 9352 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9353 %} 9354 ins_pipe( pipe_slow ); 9355 %} 9356 9357 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9358 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9359 match(Set dst (URShiftVS dst shift)); 9360 effect(TEMP src); 9361 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9362 ins_encode %{ 9363 int vector_len = 1; 9364 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9365 %} 9366 ins_pipe( pipe_slow ); 9367 %} 9368 9369 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9370 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9371 match(Set dst (URShiftVS src shift)); 9372 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9373 ins_encode %{ 9374 int vector_len = 2; 9375 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9376 %} 9377 ins_pipe( pipe_slow ); 9378 %} 9379 9380 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9381 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9382 match(Set dst (URShiftVS src shift)); 9383 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9384 ins_encode %{ 9385 int vector_len = 2; 9386 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9387 %} 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 // Integers vector logical right shift 9392 instruct vsrl2I(vecD dst, vecS shift) %{ 9393 predicate(n->as_Vector()->length() == 2); 9394 match(Set dst (URShiftVI dst shift)); 9395 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9396 ins_encode %{ 9397 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9403 predicate(n->as_Vector()->length() == 2); 9404 match(Set dst (URShiftVI dst shift)); 9405 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9406 ins_encode %{ 9407 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9413 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9414 match(Set dst (URShiftVI src shift)); 9415 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9416 ins_encode %{ 9417 int vector_len = 0; 9418 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9419 %} 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9424 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9425 match(Set dst (URShiftVI src shift)); 9426 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9427 ins_encode %{ 9428 int vector_len = 0; 9429 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 instruct vsrl4I(vecX dst, vecS shift) %{ 9435 predicate(n->as_Vector()->length() == 4); 9436 match(Set dst (URShiftVI dst shift)); 9437 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9438 ins_encode %{ 9439 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9445 predicate(n->as_Vector()->length() == 4); 9446 match(Set dst (URShiftVI dst shift)); 9447 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9448 ins_encode %{ 9449 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9455 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9456 match(Set dst (URShiftVI src shift)); 9457 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9458 ins_encode %{ 9459 int vector_len = 0; 9460 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9461 %} 9462 ins_pipe( pipe_slow ); 9463 %} 9464 9465 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9466 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9467 match(Set dst (URShiftVI src shift)); 9468 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9469 ins_encode %{ 9470 int vector_len = 0; 9471 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9472 %} 9473 ins_pipe( pipe_slow ); 9474 %} 9475 9476 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9477 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9478 match(Set dst (URShiftVI src shift)); 9479 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9480 ins_encode %{ 9481 int vector_len = 1; 9482 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9488 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9489 match(Set dst (URShiftVI src shift)); 9490 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9491 ins_encode %{ 9492 int vector_len = 1; 9493 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9494 %} 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9499 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9500 match(Set dst (URShiftVI src shift)); 9501 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9502 ins_encode %{ 9503 int vector_len = 2; 9504 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9505 %} 9506 ins_pipe( pipe_slow ); 9507 %} 9508 9509 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9510 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9511 match(Set dst (URShiftVI src shift)); 9512 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9513 ins_encode %{ 9514 int vector_len = 2; 9515 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 // Longs vector logical right shift 9521 instruct vsrl2L(vecX dst, vecS shift) %{ 9522 predicate(n->as_Vector()->length() == 2); 9523 match(Set dst (URShiftVL dst shift)); 9524 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9525 ins_encode %{ 9526 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9527 %} 9528 ins_pipe( pipe_slow ); 9529 %} 9530 9531 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9532 predicate(n->as_Vector()->length() == 2); 9533 match(Set dst (URShiftVL dst shift)); 9534 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9535 ins_encode %{ 9536 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9542 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9543 match(Set dst (URShiftVL src shift)); 9544 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9545 ins_encode %{ 9546 int vector_len = 0; 9547 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9553 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9554 match(Set dst (URShiftVL src shift)); 9555 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9556 ins_encode %{ 9557 int vector_len = 0; 9558 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9564 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9565 match(Set dst (URShiftVL src shift)); 9566 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9567 ins_encode %{ 9568 int vector_len = 1; 9569 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9570 %} 9571 ins_pipe( pipe_slow ); 9572 %} 9573 9574 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9575 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9576 match(Set dst (URShiftVL src shift)); 9577 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9578 ins_encode %{ 9579 int vector_len = 1; 9580 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9586 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9587 match(Set dst (URShiftVL src shift)); 9588 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9589 ins_encode %{ 9590 int vector_len = 2; 9591 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9592 %} 9593 ins_pipe( pipe_slow ); 9594 %} 9595 9596 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9597 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9598 match(Set dst (URShiftVL src shift)); 9599 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9600 ins_encode %{ 9601 int vector_len = 2; 9602 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 // ------------------- ArithmeticRightShift ----------------------------------- 9608 9609 // Shorts/Chars vector arithmetic right shift 9610 instruct vsra2S(vecS dst, vecS shift) %{ 9611 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9612 match(Set dst (RShiftVS dst shift)); 9613 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9614 ins_encode %{ 9615 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9621 predicate(n->as_Vector()->length() == 2); 9622 match(Set dst (RShiftVS dst shift)); 9623 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9624 ins_encode %{ 9625 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9626 %} 9627 ins_pipe( pipe_slow ); 9628 %} 9629 9630 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9631 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9632 match(Set dst (RShiftVS src shift)); 9633 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9634 ins_encode %{ 9635 int vector_len = 0; 9636 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9637 %} 9638 ins_pipe( pipe_slow ); 9639 %} 9640 9641 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9642 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9643 match(Set dst (RShiftVS src shift)); 9644 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9645 ins_encode %{ 9646 int vector_len = 0; 9647 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9648 %} 9649 ins_pipe( pipe_slow ); 9650 %} 9651 9652 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9653 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9654 match(Set dst (RShiftVS dst shift)); 9655 effect(TEMP src); 9656 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9657 ins_encode %{ 9658 int vector_len = 0; 9659 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9660 %} 9661 ins_pipe( pipe_slow ); 9662 %} 9663 9664 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9665 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9666 match(Set dst (RShiftVS src shift)); 9667 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9668 ins_encode %{ 9669 int vector_len = 0; 9670 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9676 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9677 match(Set dst (RShiftVS src shift)); 9678 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9679 ins_encode %{ 9680 int vector_len = 0; 9681 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9682 %} 9683 ins_pipe( pipe_slow ); 9684 %} 9685 9686 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9687 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9688 match(Set dst (RShiftVS dst shift)); 9689 effect(TEMP src); 9690 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9691 ins_encode %{ 9692 int vector_len = 0; 9693 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9694 %} 9695 ins_pipe( pipe_slow ); 9696 %} 9697 9698 instruct vsra4S(vecD dst, vecS shift) %{ 9699 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9700 match(Set dst (RShiftVS dst shift)); 9701 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9702 ins_encode %{ 9703 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9704 %} 9705 ins_pipe( pipe_slow ); 9706 %} 9707 9708 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9709 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9710 match(Set dst (RShiftVS dst shift)); 9711 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9712 ins_encode %{ 9713 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9719 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9720 match(Set dst (RShiftVS src shift)); 9721 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9722 ins_encode %{ 9723 int vector_len = 0; 9724 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9725 %} 9726 ins_pipe( pipe_slow ); 9727 %} 9728 9729 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9730 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9731 match(Set dst (RShiftVS src shift)); 9732 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9733 ins_encode %{ 9734 int vector_len = 0; 9735 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9736 %} 9737 ins_pipe( pipe_slow ); 9738 %} 9739 9740 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9741 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9742 match(Set dst (RShiftVS dst shift)); 9743 effect(TEMP src); 9744 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9745 ins_encode %{ 9746 int vector_len = 0; 9747 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9748 %} 9749 ins_pipe( pipe_slow ); 9750 %} 9751 9752 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9753 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9754 match(Set dst (RShiftVS src shift)); 9755 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9756 ins_encode %{ 9757 int vector_len = 0; 9758 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9759 %} 9760 ins_pipe( pipe_slow ); 9761 %} 9762 9763 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9764 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9765 match(Set dst (RShiftVS src shift)); 9766 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9767 ins_encode %{ 9768 int vector_len = 0; 9769 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9770 %} 9771 ins_pipe( pipe_slow ); 9772 %} 9773 9774 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9775 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9776 match(Set dst (RShiftVS dst shift)); 9777 effect(TEMP src); 9778 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9779 ins_encode %{ 9780 int vector_len = 0; 9781 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 instruct vsra8S(vecX dst, vecS shift) %{ 9787 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9788 match(Set dst (RShiftVS dst shift)); 9789 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9790 ins_encode %{ 9791 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9792 %} 9793 ins_pipe( pipe_slow ); 9794 %} 9795 9796 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9797 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9798 match(Set dst (RShiftVS dst shift)); 9799 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9800 ins_encode %{ 9801 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9802 %} 9803 ins_pipe( pipe_slow ); 9804 %} 9805 9806 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9807 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9808 match(Set dst (RShiftVS src shift)); 9809 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9810 ins_encode %{ 9811 int vector_len = 0; 9812 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9813 %} 9814 ins_pipe( pipe_slow ); 9815 %} 9816 9817 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9818 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9819 match(Set dst (RShiftVS src shift)); 9820 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9821 ins_encode %{ 9822 int vector_len = 0; 9823 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9824 %} 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9829 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9830 match(Set dst (RShiftVS dst shift)); 9831 effect(TEMP src); 9832 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9833 ins_encode %{ 9834 int vector_len = 0; 9835 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9836 %} 9837 ins_pipe( pipe_slow ); 9838 %} 9839 9840 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9841 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9842 match(Set dst (RShiftVS src shift)); 9843 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9844 ins_encode %{ 9845 int vector_len = 0; 9846 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9852 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9853 match(Set dst (RShiftVS src shift)); 9854 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9855 ins_encode %{ 9856 int vector_len = 0; 9857 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9858 %} 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9863 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9864 match(Set dst (RShiftVS dst shift)); 9865 effect(TEMP src); 9866 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9867 ins_encode %{ 9868 int vector_len = 0; 9869 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9875 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9876 match(Set dst (RShiftVS src shift)); 9877 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9878 ins_encode %{ 9879 int vector_len = 1; 9880 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9886 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9887 match(Set dst (RShiftVS src shift)); 9888 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9889 ins_encode %{ 9890 int vector_len = 1; 9891 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9892 %} 9893 ins_pipe( pipe_slow ); 9894 %} 9895 9896 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9897 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9898 match(Set dst (RShiftVS dst shift)); 9899 effect(TEMP src); 9900 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9901 ins_encode %{ 9902 int vector_len = 1; 9903 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9904 %} 9905 ins_pipe( pipe_slow ); 9906 %} 9907 9908 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9909 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 9910 match(Set dst (RShiftVS src shift)); 9911 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9912 ins_encode %{ 9913 int vector_len = 1; 9914 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9915 %} 9916 ins_pipe( pipe_slow ); 9917 %} 9918 9919 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9920 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9921 match(Set dst (RShiftVS src shift)); 9922 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9923 ins_encode %{ 9924 int vector_len = 1; 9925 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9926 %} 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9931 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9932 match(Set dst (RShiftVS dst shift)); 9933 effect(TEMP src); 9934 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9935 ins_encode %{ 9936 int vector_len = 1; 9937 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9938 %} 9939 ins_pipe( pipe_slow ); 9940 %} 9941 9942 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9943 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9944 match(Set dst (RShiftVS src shift)); 9945 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9946 ins_encode %{ 9947 int vector_len = 2; 9948 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9949 %} 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9954 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9955 match(Set dst (RShiftVS src shift)); 9956 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9957 ins_encode %{ 9958 int vector_len = 2; 9959 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9960 %} 9961 ins_pipe( pipe_slow ); 9962 %} 9963 9964 // Integers vector arithmetic right shift 9965 instruct vsra2I(vecD dst, vecS shift) %{ 9966 predicate(n->as_Vector()->length() == 2); 9967 match(Set dst (RShiftVI dst shift)); 9968 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9969 ins_encode %{ 9970 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9971 %} 9972 ins_pipe( pipe_slow ); 9973 %} 9974 9975 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9976 predicate(n->as_Vector()->length() == 2); 9977 match(Set dst (RShiftVI dst shift)); 9978 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9979 ins_encode %{ 9980 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9981 %} 9982 ins_pipe( pipe_slow ); 9983 %} 9984 9985 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9986 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9987 match(Set dst (RShiftVI src shift)); 9988 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9989 ins_encode %{ 9990 int vector_len = 0; 9991 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9992 %} 9993 ins_pipe( pipe_slow ); 9994 %} 9995 9996 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9997 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9998 match(Set dst (RShiftVI src shift)); 9999 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10000 ins_encode %{ 10001 int vector_len = 0; 10002 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10003 %} 10004 ins_pipe( pipe_slow ); 10005 %} 10006 10007 instruct vsra4I(vecX dst, vecS shift) %{ 10008 predicate(n->as_Vector()->length() == 4); 10009 match(Set dst (RShiftVI dst shift)); 10010 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10011 ins_encode %{ 10012 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10013 %} 10014 ins_pipe( pipe_slow ); 10015 %} 10016 10017 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10018 predicate(n->as_Vector()->length() == 4); 10019 match(Set dst (RShiftVI dst shift)); 10020 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10021 ins_encode %{ 10022 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10028 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10029 match(Set dst (RShiftVI src shift)); 10030 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10031 ins_encode %{ 10032 int vector_len = 0; 10033 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10034 %} 10035 ins_pipe( pipe_slow ); 10036 %} 10037 10038 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10039 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10040 match(Set dst (RShiftVI src shift)); 10041 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10042 ins_encode %{ 10043 int vector_len = 0; 10044 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10045 %} 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10050 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10051 match(Set dst (RShiftVI src shift)); 10052 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10053 ins_encode %{ 10054 int vector_len = 1; 10055 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10056 %} 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10061 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10062 match(Set dst (RShiftVI src shift)); 10063 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10064 ins_encode %{ 10065 int vector_len = 1; 10066 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10067 %} 10068 ins_pipe( pipe_slow ); 10069 %} 10070 10071 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10072 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10073 match(Set dst (RShiftVI src shift)); 10074 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10075 ins_encode %{ 10076 int vector_len = 2; 10077 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10078 %} 10079 ins_pipe( pipe_slow ); 10080 %} 10081 10082 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10083 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10084 match(Set dst (RShiftVI src shift)); 10085 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10086 ins_encode %{ 10087 int vector_len = 2; 10088 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10089 %} 10090 ins_pipe( pipe_slow ); 10091 %} 10092 10093 // There are no longs vector arithmetic right shift instructions. 10094 10095 10096 // --------------------------------- AND -------------------------------------- 10097 10098 instruct vand4B(vecS dst, vecS src) %{ 10099 predicate(n->as_Vector()->length_in_bytes() == 4); 10100 match(Set dst (AndV dst src)); 10101 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10102 ins_encode %{ 10103 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10104 %} 10105 ins_pipe( pipe_slow ); 10106 %} 10107 10108 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10109 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10110 match(Set dst (AndV src1 src2)); 10111 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10112 ins_encode %{ 10113 int vector_len = 0; 10114 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10115 %} 10116 ins_pipe( pipe_slow ); 10117 %} 10118 10119 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10120 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10121 match(Set dst (AndV src (LoadVector mem))); 10122 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10123 ins_encode %{ 10124 int vector_len = 0; 10125 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10126 %} 10127 ins_pipe( pipe_slow ); 10128 %} 10129 10130 instruct vand8B(vecD dst, vecD src) %{ 10131 predicate(n->as_Vector()->length_in_bytes() == 8); 10132 match(Set dst (AndV dst src)); 10133 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10134 ins_encode %{ 10135 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10136 %} 10137 ins_pipe( pipe_slow ); 10138 %} 10139 10140 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10141 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10142 match(Set dst (AndV src1 src2)); 10143 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10144 ins_encode %{ 10145 int vector_len = 0; 10146 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10147 %} 10148 ins_pipe( pipe_slow ); 10149 %} 10150 10151 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10152 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10153 match(Set dst (AndV src (LoadVector mem))); 10154 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10155 ins_encode %{ 10156 int vector_len = 0; 10157 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10158 %} 10159 ins_pipe( pipe_slow ); 10160 %} 10161 10162 instruct vand16B(vecX dst, vecX src) %{ 10163 predicate(n->as_Vector()->length_in_bytes() == 16); 10164 match(Set dst (AndV dst src)); 10165 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10166 ins_encode %{ 10167 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10168 %} 10169 ins_pipe( pipe_slow ); 10170 %} 10171 10172 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10173 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10174 match(Set dst (AndV src1 src2)); 10175 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10176 ins_encode %{ 10177 int vector_len = 0; 10178 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10179 %} 10180 ins_pipe( pipe_slow ); 10181 %} 10182 10183 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10184 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10185 match(Set dst (AndV src (LoadVector mem))); 10186 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10187 ins_encode %{ 10188 int vector_len = 0; 10189 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10190 %} 10191 ins_pipe( pipe_slow ); 10192 %} 10193 10194 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10195 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10196 match(Set dst (AndV src1 src2)); 10197 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10198 ins_encode %{ 10199 int vector_len = 1; 10200 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10201 %} 10202 ins_pipe( pipe_slow ); 10203 %} 10204 10205 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10206 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10207 match(Set dst (AndV src (LoadVector mem))); 10208 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10209 ins_encode %{ 10210 int vector_len = 1; 10211 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10212 %} 10213 ins_pipe( pipe_slow ); 10214 %} 10215 10216 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10217 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10218 match(Set dst (AndV src1 src2)); 10219 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10220 ins_encode %{ 10221 int vector_len = 2; 10222 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10228 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10229 match(Set dst (AndV src (LoadVector mem))); 10230 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10231 ins_encode %{ 10232 int vector_len = 2; 10233 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10234 %} 10235 ins_pipe( pipe_slow ); 10236 %} 10237 10238 // --------------------------------- OR --------------------------------------- 10239 10240 instruct vor4B(vecS dst, vecS src) %{ 10241 predicate(n->as_Vector()->length_in_bytes() == 4); 10242 match(Set dst (OrV dst src)); 10243 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10244 ins_encode %{ 10245 __ por($dst$$XMMRegister, $src$$XMMRegister); 10246 %} 10247 ins_pipe( pipe_slow ); 10248 %} 10249 10250 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10251 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10252 match(Set dst (OrV src1 src2)); 10253 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10254 ins_encode %{ 10255 int vector_len = 0; 10256 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10257 %} 10258 ins_pipe( pipe_slow ); 10259 %} 10260 10261 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10262 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10263 match(Set dst (OrV src (LoadVector mem))); 10264 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10265 ins_encode %{ 10266 int vector_len = 0; 10267 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10268 %} 10269 ins_pipe( pipe_slow ); 10270 %} 10271 10272 instruct vor8B(vecD dst, vecD src) %{ 10273 predicate(n->as_Vector()->length_in_bytes() == 8); 10274 match(Set dst (OrV dst src)); 10275 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10276 ins_encode %{ 10277 __ por($dst$$XMMRegister, $src$$XMMRegister); 10278 %} 10279 ins_pipe( pipe_slow ); 10280 %} 10281 10282 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10283 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10284 match(Set dst (OrV src1 src2)); 10285 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10286 ins_encode %{ 10287 int vector_len = 0; 10288 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10289 %} 10290 ins_pipe( pipe_slow ); 10291 %} 10292 10293 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10294 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10295 match(Set dst (OrV src (LoadVector mem))); 10296 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10297 ins_encode %{ 10298 int vector_len = 0; 10299 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10300 %} 10301 ins_pipe( pipe_slow ); 10302 %} 10303 10304 instruct vor16B(vecX dst, vecX src) %{ 10305 predicate(n->as_Vector()->length_in_bytes() == 16); 10306 match(Set dst (OrV dst src)); 10307 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10308 ins_encode %{ 10309 __ por($dst$$XMMRegister, $src$$XMMRegister); 10310 %} 10311 ins_pipe( pipe_slow ); 10312 %} 10313 10314 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10315 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10316 match(Set dst (OrV src1 src2)); 10317 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10318 ins_encode %{ 10319 int vector_len = 0; 10320 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10321 %} 10322 ins_pipe( pipe_slow ); 10323 %} 10324 10325 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10326 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10327 match(Set dst (OrV src (LoadVector mem))); 10328 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10329 ins_encode %{ 10330 int vector_len = 0; 10331 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10332 %} 10333 ins_pipe( pipe_slow ); 10334 %} 10335 10336 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10337 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10338 match(Set dst (OrV src1 src2)); 10339 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10340 ins_encode %{ 10341 int vector_len = 1; 10342 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10343 %} 10344 ins_pipe( pipe_slow ); 10345 %} 10346 10347 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10348 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10349 match(Set dst (OrV src (LoadVector mem))); 10350 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10351 ins_encode %{ 10352 int vector_len = 1; 10353 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10354 %} 10355 ins_pipe( pipe_slow ); 10356 %} 10357 10358 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10359 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10360 match(Set dst (OrV src1 src2)); 10361 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10362 ins_encode %{ 10363 int vector_len = 2; 10364 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10365 %} 10366 ins_pipe( pipe_slow ); 10367 %} 10368 10369 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10370 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10371 match(Set dst (OrV src (LoadVector mem))); 10372 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10373 ins_encode %{ 10374 int vector_len = 2; 10375 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10376 %} 10377 ins_pipe( pipe_slow ); 10378 %} 10379 10380 // --------------------------------- XOR -------------------------------------- 10381 10382 instruct vxor4B(vecS dst, vecS src) %{ 10383 predicate(n->as_Vector()->length_in_bytes() == 4); 10384 match(Set dst (XorV dst src)); 10385 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10386 ins_encode %{ 10387 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10388 %} 10389 ins_pipe( pipe_slow ); 10390 %} 10391 10392 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10393 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10394 match(Set dst (XorV src1 src2)); 10395 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10396 ins_encode %{ 10397 int vector_len = 0; 10398 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10399 %} 10400 ins_pipe( pipe_slow ); 10401 %} 10402 10403 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10404 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10405 match(Set dst (XorV src (LoadVector mem))); 10406 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10407 ins_encode %{ 10408 int vector_len = 0; 10409 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10410 %} 10411 ins_pipe( pipe_slow ); 10412 %} 10413 10414 instruct vxor8B(vecD dst, vecD src) %{ 10415 predicate(n->as_Vector()->length_in_bytes() == 8); 10416 match(Set dst (XorV dst src)); 10417 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10418 ins_encode %{ 10419 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10420 %} 10421 ins_pipe( pipe_slow ); 10422 %} 10423 10424 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10425 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10426 match(Set dst (XorV src1 src2)); 10427 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10428 ins_encode %{ 10429 int vector_len = 0; 10430 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10431 %} 10432 ins_pipe( pipe_slow ); 10433 %} 10434 10435 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10436 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10437 match(Set dst (XorV src (LoadVector mem))); 10438 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10439 ins_encode %{ 10440 int vector_len = 0; 10441 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10442 %} 10443 ins_pipe( pipe_slow ); 10444 %} 10445 10446 instruct vxor16B(vecX dst, vecX src) %{ 10447 predicate(n->as_Vector()->length_in_bytes() == 16); 10448 match(Set dst (XorV dst src)); 10449 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10450 ins_encode %{ 10451 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10452 %} 10453 ins_pipe( pipe_slow ); 10454 %} 10455 10456 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10457 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10458 match(Set dst (XorV src1 src2)); 10459 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10460 ins_encode %{ 10461 int vector_len = 0; 10462 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10463 %} 10464 ins_pipe( pipe_slow ); 10465 %} 10466 10467 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10468 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10469 match(Set dst (XorV src (LoadVector mem))); 10470 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10471 ins_encode %{ 10472 int vector_len = 0; 10473 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10474 %} 10475 ins_pipe( pipe_slow ); 10476 %} 10477 10478 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10479 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10480 match(Set dst (XorV src1 src2)); 10481 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10482 ins_encode %{ 10483 int vector_len = 1; 10484 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10485 %} 10486 ins_pipe( pipe_slow ); 10487 %} 10488 10489 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10490 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10491 match(Set dst (XorV src (LoadVector mem))); 10492 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10493 ins_encode %{ 10494 int vector_len = 1; 10495 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10496 %} 10497 ins_pipe( pipe_slow ); 10498 %} 10499 10500 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10501 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10502 match(Set dst (XorV src1 src2)); 10503 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10504 ins_encode %{ 10505 int vector_len = 2; 10506 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10507 %} 10508 ins_pipe( pipe_slow ); 10509 %} 10510 10511 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10512 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10513 match(Set dst (XorV src (LoadVector mem))); 10514 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10515 ins_encode %{ 10516 int vector_len = 2; 10517 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10518 %} 10519 ins_pipe( pipe_slow ); 10520 %} 10521 10522 // --------------------------------- FMA -------------------------------------- 10523 10524 // a * b + c 10525 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 10526 predicate(UseFMA && n->as_Vector()->length() == 2); 10527 match(Set c (FmaVD c (Binary a b))); 10528 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10529 ins_cost(150); 10530 ins_encode %{ 10531 int vector_len = 0; 10532 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10533 %} 10534 ins_pipe( pipe_slow ); 10535 %} 10536 10537 // a * b + c 10538 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 10539 predicate(UseFMA && n->as_Vector()->length() == 2); 10540 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10541 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10542 ins_cost(150); 10543 ins_encode %{ 10544 int vector_len = 0; 10545 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10546 %} 10547 ins_pipe( pipe_slow ); 10548 %} 10549 10550 10551 // a * b + c 10552 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 10553 predicate(UseFMA && n->as_Vector()->length() == 4); 10554 match(Set c (FmaVD c (Binary a b))); 10555 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10556 ins_cost(150); 10557 ins_encode %{ 10558 int vector_len = 1; 10559 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 // a * b + c 10565 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 10566 predicate(UseFMA && n->as_Vector()->length() == 4); 10567 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10568 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10569 ins_cost(150); 10570 ins_encode %{ 10571 int vector_len = 1; 10572 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10573 %} 10574 ins_pipe( pipe_slow ); 10575 %} 10576 10577 // a * b + c 10578 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 10579 predicate(UseFMA && n->as_Vector()->length() == 8); 10580 match(Set c (FmaVD c (Binary a b))); 10581 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10582 ins_cost(150); 10583 ins_encode %{ 10584 int vector_len = 2; 10585 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10586 %} 10587 ins_pipe( pipe_slow ); 10588 %} 10589 10590 // a * b + c 10591 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 10592 predicate(UseFMA && n->as_Vector()->length() == 8); 10593 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10594 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10595 ins_cost(150); 10596 ins_encode %{ 10597 int vector_len = 2; 10598 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10599 %} 10600 ins_pipe( pipe_slow ); 10601 %} 10602 10603 // a * b + c 10604 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 10605 predicate(UseFMA && n->as_Vector()->length() == 4); 10606 match(Set c (FmaVF c (Binary a b))); 10607 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10608 ins_cost(150); 10609 ins_encode %{ 10610 int vector_len = 0; 10611 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10612 %} 10613 ins_pipe( pipe_slow ); 10614 %} 10615 10616 // a * b + c 10617 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 10618 predicate(UseFMA && n->as_Vector()->length() == 4); 10619 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10620 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10621 ins_cost(150); 10622 ins_encode %{ 10623 int vector_len = 0; 10624 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10625 %} 10626 ins_pipe( pipe_slow ); 10627 %} 10628 10629 // a * b + c 10630 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 10631 predicate(UseFMA && n->as_Vector()->length() == 8); 10632 match(Set c (FmaVF c (Binary a b))); 10633 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10634 ins_cost(150); 10635 ins_encode %{ 10636 int vector_len = 1; 10637 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10638 %} 10639 ins_pipe( pipe_slow ); 10640 %} 10641 10642 // a * b + c 10643 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 10644 predicate(UseFMA && n->as_Vector()->length() == 8); 10645 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10646 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10647 ins_cost(150); 10648 ins_encode %{ 10649 int vector_len = 1; 10650 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10651 %} 10652 ins_pipe( pipe_slow ); 10653 %} 10654 10655 // a * b + c 10656 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 10657 predicate(UseFMA && n->as_Vector()->length() == 16); 10658 match(Set c (FmaVF c (Binary a b))); 10659 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10660 ins_cost(150); 10661 ins_encode %{ 10662 int vector_len = 2; 10663 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10664 %} 10665 ins_pipe( pipe_slow ); 10666 %} 10667 10668 // a * b + c 10669 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 10670 predicate(UseFMA && n->as_Vector()->length() == 16); 10671 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10672 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10673 ins_cost(150); 10674 ins_encode %{ 10675 int vector_len = 2; 10676 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10677 %} 10678 ins_pipe( pipe_slow ); 10679 %}