1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 %} 1071 1072 1073 //----------SOURCE BLOCK------------------------------------------------------- 1074 // This is a block of C++ code which provides values, functions, and 1075 // definitions necessary in the rest of the architecture description 1076 1077 source_hpp %{ 1078 // Header information of the source block. 1079 // Method declarations/definitions which are used outside 1080 // the ad-scope can conveniently be defined here. 1081 // 1082 // To keep related declarations/definitions/uses close together, 1083 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1084 1085 class NativeJump; 1086 1087 class CallStubImpl { 1088 1089 //-------------------------------------------------------------- 1090 //---< Used for optimization in Compile::shorten_branches >--- 1091 //-------------------------------------------------------------- 1092 1093 public: 1094 // Size of call trampoline stub. 1095 static uint size_call_trampoline() { 1096 return 0; // no call trampolines on this platform 1097 } 1098 1099 // number of relocations needed by a call trampoline stub 1100 static uint reloc_call_trampoline() { 1101 return 0; // no call trampolines on this platform 1102 } 1103 }; 1104 1105 class HandlerImpl { 1106 1107 public: 1108 1109 static int emit_exception_handler(CodeBuffer &cbuf); 1110 static int emit_deopt_handler(CodeBuffer& cbuf); 1111 1112 static uint size_exception_handler() { 1113 // NativeCall instruction size is the same as NativeJump. 1114 // exception handler starts out as jump and can be patched to 1115 // a call be deoptimization. (4932387) 1116 // Note that this value is also credited (in output.cpp) to 1117 // the size of the code section. 1118 return NativeJump::instruction_size; 1119 } 1120 1121 #ifdef _LP64 1122 static uint size_deopt_handler() { 1123 // three 5 byte instructions 1124 return 15; 1125 } 1126 #else 1127 static uint size_deopt_handler() { 1128 // NativeCall instruction size is the same as NativeJump. 1129 // exception handler starts out as jump and can be patched to 1130 // a call be deoptimization. (4932387) 1131 // Note that this value is also credited (in output.cpp) to 1132 // the size of the code section. 1133 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1134 } 1135 #endif 1136 }; 1137 1138 %} // end source_hpp 1139 1140 source %{ 1141 1142 #include "opto/addnode.hpp" 1143 1144 // Emit exception handler code. 1145 // Stuff framesize into a register and call a VM stub routine. 1146 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1147 1148 // Note that the code buffer's insts_mark is always relative to insts. 1149 // That's why we must use the macroassembler to generate a handler. 1150 MacroAssembler _masm(&cbuf); 1151 address base = __ start_a_stub(size_exception_handler()); 1152 if (base == NULL) { 1153 ciEnv::current()->record_failure("CodeCache is full"); 1154 return 0; // CodeBuffer::expand failed 1155 } 1156 int offset = __ offset(); 1157 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1158 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1159 __ end_a_stub(); 1160 return offset; 1161 } 1162 1163 // Emit deopt handler code. 1164 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1165 1166 // Note that the code buffer's insts_mark is always relative to insts. 1167 // That's why we must use the macroassembler to generate a handler. 1168 MacroAssembler _masm(&cbuf); 1169 address base = __ start_a_stub(size_deopt_handler()); 1170 if (base == NULL) { 1171 ciEnv::current()->record_failure("CodeCache is full"); 1172 return 0; // CodeBuffer::expand failed 1173 } 1174 int offset = __ offset(); 1175 1176 #ifdef _LP64 1177 address the_pc = (address) __ pc(); 1178 Label next; 1179 // push a "the_pc" on the stack without destroying any registers 1180 // as they all may be live. 1181 1182 // push address of "next" 1183 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1184 __ bind(next); 1185 // adjust it so it matches "the_pc" 1186 __ subptr(Address(rsp, 0), __ offset() - offset); 1187 #else 1188 InternalAddress here(__ pc()); 1189 __ pushptr(here.addr()); 1190 #endif 1191 1192 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1193 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1194 __ end_a_stub(); 1195 return offset; 1196 } 1197 1198 1199 //============================================================================= 1200 1201 // Float masks come from different places depending on platform. 1202 #ifdef _LP64 1203 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1204 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1205 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1206 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1207 #else 1208 static address float_signmask() { return (address)float_signmask_pool; } 1209 static address float_signflip() { return (address)float_signflip_pool; } 1210 static address double_signmask() { return (address)double_signmask_pool; } 1211 static address double_signflip() { return (address)double_signflip_pool; } 1212 #endif 1213 1214 1215 const bool Matcher::match_rule_supported(int opcode) { 1216 if (!has_match_rule(opcode)) 1217 return false; 1218 1219 bool ret_value = true; 1220 switch (opcode) { 1221 case Op_PopCountI: 1222 case Op_PopCountL: 1223 if (!UsePopCountInstruction) 1224 ret_value = false; 1225 break; 1226 case Op_MulVI: 1227 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1228 ret_value = false; 1229 break; 1230 case Op_MulVL: 1231 case Op_MulReductionVL: 1232 if (VM_Version::supports_avx512dq() == false) 1233 ret_value = false; 1234 break; 1235 case Op_AddReductionVL: 1236 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1237 ret_value = false; 1238 break; 1239 case Op_AddReductionVI: 1240 if (UseSSE < 3) // requires at least SSE3 1241 ret_value = false; 1242 break; 1243 case Op_MulReductionVI: 1244 if (UseSSE < 4) // requires at least SSE4 1245 ret_value = false; 1246 break; 1247 case Op_AddReductionVF: 1248 case Op_AddReductionVD: 1249 case Op_MulReductionVF: 1250 case Op_MulReductionVD: 1251 if (UseSSE < 1) // requires at least SSE 1252 ret_value = false; 1253 break; 1254 case Op_SqrtVD: 1255 if (UseAVX < 1) // enabled for AVX only 1256 ret_value = false; 1257 break; 1258 case Op_CompareAndSwapL: 1259 #ifdef _LP64 1260 case Op_CompareAndSwapP: 1261 #endif 1262 if (!VM_Version::supports_cx8()) 1263 ret_value = false; 1264 break; 1265 case Op_CMoveVD: 1266 if (UseAVX < 1 || UseAVX > 2) 1267 ret_value = false; 1268 break; 1269 case Op_StrIndexOf: 1270 if (!UseSSE42Intrinsics) 1271 ret_value = false; 1272 break; 1273 case Op_StrIndexOfChar: 1274 if (!UseSSE42Intrinsics) 1275 ret_value = false; 1276 break; 1277 case Op_OnSpinWait: 1278 if (VM_Version::supports_on_spin_wait() == false) 1279 ret_value = false; 1280 break; 1281 } 1282 1283 return ret_value; // Per default match rules are supported. 1284 } 1285 1286 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1287 // identify extra cases that we might want to provide match rules for 1288 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1289 bool ret_value = match_rule_supported(opcode); 1290 if (ret_value) { 1291 switch (opcode) { 1292 case Op_AddVB: 1293 case Op_SubVB: 1294 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1295 ret_value = false; 1296 break; 1297 case Op_URShiftVS: 1298 case Op_RShiftVS: 1299 case Op_LShiftVS: 1300 case Op_MulVS: 1301 case Op_AddVS: 1302 case Op_SubVS: 1303 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1304 ret_value = false; 1305 break; 1306 case Op_CMoveVD: 1307 if (vlen != 4) 1308 ret_value = false; 1309 break; 1310 } 1311 } 1312 1313 return ret_value; // Per default match rules are supported. 1314 } 1315 1316 const bool Matcher::has_predicated_vectors(void) { 1317 bool ret_value = false; 1318 if (UseAVX > 2) { 1319 ret_value = VM_Version::supports_avx512vl(); 1320 } 1321 1322 return ret_value; 1323 } 1324 1325 const int Matcher::float_pressure(int default_pressure_threshold) { 1326 int float_pressure_threshold = default_pressure_threshold; 1327 #ifdef _LP64 1328 if (UseAVX > 2) { 1329 // Increase pressure threshold on machines with AVX3 which have 1330 // 2x more XMM registers. 1331 float_pressure_threshold = default_pressure_threshold * 2; 1332 } 1333 #endif 1334 return float_pressure_threshold; 1335 } 1336 1337 // Max vector size in bytes. 0 if not supported. 1338 const int Matcher::vector_width_in_bytes(BasicType bt) { 1339 assert(is_java_primitive(bt), "only primitive type vectors"); 1340 if (UseSSE < 2) return 0; 1341 // SSE2 supports 128bit vectors for all types. 1342 // AVX2 supports 256bit vectors for all types. 1343 // AVX2/EVEX supports 512bit vectors for all types. 1344 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1345 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1346 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1347 size = (UseAVX > 2) ? 64 : 32; 1348 // Use flag to limit vector size. 1349 size = MIN2(size,(int)MaxVectorSize); 1350 // Minimum 2 values in vector (or 4 for bytes). 1351 switch (bt) { 1352 case T_DOUBLE: 1353 case T_LONG: 1354 if (size < 16) return 0; 1355 break; 1356 case T_FLOAT: 1357 case T_INT: 1358 if (size < 8) return 0; 1359 break; 1360 case T_BOOLEAN: 1361 if (size < 4) return 0; 1362 break; 1363 case T_CHAR: 1364 if (size < 4) return 0; 1365 break; 1366 case T_BYTE: 1367 if (size < 4) return 0; 1368 break; 1369 case T_SHORT: 1370 if (size < 4) return 0; 1371 break; 1372 default: 1373 ShouldNotReachHere(); 1374 } 1375 return size; 1376 } 1377 1378 // Limits on vector size (number of elements) loaded into vector. 1379 const int Matcher::max_vector_size(const BasicType bt) { 1380 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1381 } 1382 const int Matcher::min_vector_size(const BasicType bt) { 1383 int max_size = max_vector_size(bt); 1384 // Min size which can be loaded into vector is 4 bytes. 1385 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1386 return MIN2(size,max_size); 1387 } 1388 1389 // Vector ideal reg corresponding to specidied size in bytes 1390 const uint Matcher::vector_ideal_reg(int size) { 1391 assert(MaxVectorSize >= size, ""); 1392 switch(size) { 1393 case 4: return Op_VecS; 1394 case 8: return Op_VecD; 1395 case 16: return Op_VecX; 1396 case 32: return Op_VecY; 1397 case 64: return Op_VecZ; 1398 } 1399 ShouldNotReachHere(); 1400 return 0; 1401 } 1402 1403 // Only lowest bits of xmm reg are used for vector shift count. 1404 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1405 return Op_VecS; 1406 } 1407 1408 // x86 supports misaligned vectors store/load. 1409 const bool Matcher::misaligned_vectors_ok() { 1410 return !AlignVector; // can be changed by flag 1411 } 1412 1413 // x86 AES instructions are compatible with SunJCE expanded 1414 // keys, hence we do not need to pass the original key to stubs 1415 const bool Matcher::pass_original_key_for_aes() { 1416 return false; 1417 } 1418 1419 1420 const bool Matcher::convi2l_type_required = true; 1421 1422 // Check for shift by small constant as well 1423 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1424 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1425 shift->in(2)->get_int() <= 3 && 1426 // Are there other uses besides address expressions? 1427 !matcher->is_visited(shift)) { 1428 address_visited.set(shift->_idx); // Flag as address_visited 1429 mstack.push(shift->in(2), Matcher::Visit); 1430 Node *conv = shift->in(1); 1431 #ifdef _LP64 1432 // Allow Matcher to match the rule which bypass 1433 // ConvI2L operation for an array index on LP64 1434 // if the index value is positive. 1435 if (conv->Opcode() == Op_ConvI2L && 1436 conv->as_Type()->type()->is_long()->_lo >= 0 && 1437 // Are there other uses besides address expressions? 1438 !matcher->is_visited(conv)) { 1439 address_visited.set(conv->_idx); // Flag as address_visited 1440 mstack.push(conv->in(1), Matcher::Pre_Visit); 1441 } else 1442 #endif 1443 mstack.push(conv, Matcher::Pre_Visit); 1444 return true; 1445 } 1446 return false; 1447 } 1448 1449 // Should the Matcher clone shifts on addressing modes, expecting them 1450 // to be subsumed into complex addressing expressions or compute them 1451 // into registers? 1452 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1453 Node *off = m->in(AddPNode::Offset); 1454 if (off->is_Con()) { 1455 address_visited.test_set(m->_idx); // Flag as address_visited 1456 Node *adr = m->in(AddPNode::Address); 1457 1458 // Intel can handle 2 adds in addressing mode 1459 // AtomicAdd is not an addressing expression. 1460 // Cheap to find it by looking for screwy base. 1461 if (adr->is_AddP() && 1462 !adr->in(AddPNode::Base)->is_top() && 1463 // Are there other uses besides address expressions? 1464 !is_visited(adr)) { 1465 address_visited.set(adr->_idx); // Flag as address_visited 1466 Node *shift = adr->in(AddPNode::Offset); 1467 if (!clone_shift(shift, this, mstack, address_visited)) { 1468 mstack.push(shift, Pre_Visit); 1469 } 1470 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1471 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1472 } else { 1473 mstack.push(adr, Pre_Visit); 1474 } 1475 1476 // Clone X+offset as it also folds into most addressing expressions 1477 mstack.push(off, Visit); 1478 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1479 return true; 1480 } else if (clone_shift(off, this, mstack, address_visited)) { 1481 address_visited.test_set(m->_idx); // Flag as address_visited 1482 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1483 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1484 return true; 1485 } 1486 return false; 1487 } 1488 1489 void Compile::reshape_address(AddPNode* addp) { 1490 } 1491 1492 // Helper methods for MachSpillCopyNode::implementation(). 1493 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1494 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1495 // In 64-bit VM size calculation is very complex. Emitting instructions 1496 // into scratch buffer is used to get size in 64-bit VM. 1497 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1498 assert(ireg == Op_VecS || // 32bit vector 1499 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1500 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1501 "no non-adjacent vector moves" ); 1502 if (cbuf) { 1503 MacroAssembler _masm(cbuf); 1504 int offset = __ offset(); 1505 switch (ireg) { 1506 case Op_VecS: // copy whole register 1507 case Op_VecD: 1508 case Op_VecX: 1509 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1510 break; 1511 case Op_VecY: 1512 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1513 break; 1514 case Op_VecZ: 1515 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1516 break; 1517 default: 1518 ShouldNotReachHere(); 1519 } 1520 int size = __ offset() - offset; 1521 #ifdef ASSERT 1522 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1523 assert(!do_size || size == 4, "incorrect size calculattion"); 1524 #endif 1525 return size; 1526 #ifndef PRODUCT 1527 } else if (!do_size) { 1528 switch (ireg) { 1529 case Op_VecS: 1530 case Op_VecD: 1531 case Op_VecX: 1532 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1533 break; 1534 case Op_VecY: 1535 case Op_VecZ: 1536 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1537 break; 1538 default: 1539 ShouldNotReachHere(); 1540 } 1541 #endif 1542 } 1543 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1544 return (UseAVX > 2) ? 6 : 4; 1545 } 1546 1547 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1548 int stack_offset, int reg, uint ireg, outputStream* st) { 1549 // In 64-bit VM size calculation is very complex. Emitting instructions 1550 // into scratch buffer is used to get size in 64-bit VM. 1551 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1552 if (cbuf) { 1553 MacroAssembler _masm(cbuf); 1554 int offset = __ offset(); 1555 if (is_load) { 1556 switch (ireg) { 1557 case Op_VecS: 1558 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1559 break; 1560 case Op_VecD: 1561 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1562 break; 1563 case Op_VecX: 1564 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1565 break; 1566 case Op_VecY: 1567 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1568 break; 1569 case Op_VecZ: 1570 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1571 break; 1572 default: 1573 ShouldNotReachHere(); 1574 } 1575 } else { // store 1576 switch (ireg) { 1577 case Op_VecS: 1578 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1579 break; 1580 case Op_VecD: 1581 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1582 break; 1583 case Op_VecX: 1584 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1585 break; 1586 case Op_VecY: 1587 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1588 break; 1589 case Op_VecZ: 1590 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1591 break; 1592 default: 1593 ShouldNotReachHere(); 1594 } 1595 } 1596 int size = __ offset() - offset; 1597 #ifdef ASSERT 1598 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1599 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1600 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1601 #endif 1602 return size; 1603 #ifndef PRODUCT 1604 } else if (!do_size) { 1605 if (is_load) { 1606 switch (ireg) { 1607 case Op_VecS: 1608 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1609 break; 1610 case Op_VecD: 1611 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1612 break; 1613 case Op_VecX: 1614 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1615 break; 1616 case Op_VecY: 1617 case Op_VecZ: 1618 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1619 break; 1620 default: 1621 ShouldNotReachHere(); 1622 } 1623 } else { // store 1624 switch (ireg) { 1625 case Op_VecS: 1626 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1627 break; 1628 case Op_VecD: 1629 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1630 break; 1631 case Op_VecX: 1632 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1633 break; 1634 case Op_VecY: 1635 case Op_VecZ: 1636 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1637 break; 1638 default: 1639 ShouldNotReachHere(); 1640 } 1641 } 1642 #endif 1643 } 1644 bool is_single_byte = false; 1645 int vec_len = 0; 1646 if ((UseAVX > 2) && (stack_offset != 0)) { 1647 int tuple_type = Assembler::EVEX_FVM; 1648 int input_size = Assembler::EVEX_32bit; 1649 switch (ireg) { 1650 case Op_VecS: 1651 tuple_type = Assembler::EVEX_T1S; 1652 break; 1653 case Op_VecD: 1654 tuple_type = Assembler::EVEX_T1S; 1655 input_size = Assembler::EVEX_64bit; 1656 break; 1657 case Op_VecX: 1658 break; 1659 case Op_VecY: 1660 vec_len = 1; 1661 break; 1662 case Op_VecZ: 1663 vec_len = 2; 1664 break; 1665 } 1666 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1667 } 1668 int offset_size = 0; 1669 int size = 5; 1670 if (UseAVX > 2 ) { 1671 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1672 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1673 size += 2; // Need an additional two bytes for EVEX encoding 1674 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1675 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1676 } else { 1677 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1678 size += 2; // Need an additional two bytes for EVEX encodding 1679 } 1680 } else { 1681 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1682 } 1683 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1684 return size+offset_size; 1685 } 1686 1687 static inline jint replicate4_imm(int con, int width) { 1688 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1689 assert(width == 1 || width == 2, "only byte or short types here"); 1690 int bit_width = width * 8; 1691 jint val = con; 1692 val &= (1 << bit_width) - 1; // mask off sign bits 1693 while(bit_width < 32) { 1694 val |= (val << bit_width); 1695 bit_width <<= 1; 1696 } 1697 return val; 1698 } 1699 1700 static inline jlong replicate8_imm(int con, int width) { 1701 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1702 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1703 int bit_width = width * 8; 1704 jlong val = con; 1705 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1706 while(bit_width < 64) { 1707 val |= (val << bit_width); 1708 bit_width <<= 1; 1709 } 1710 return val; 1711 } 1712 1713 #ifndef PRODUCT 1714 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1715 st->print("nop \t# %d bytes pad for loops and calls", _count); 1716 } 1717 #endif 1718 1719 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1720 MacroAssembler _masm(&cbuf); 1721 __ nop(_count); 1722 } 1723 1724 uint MachNopNode::size(PhaseRegAlloc*) const { 1725 return _count; 1726 } 1727 1728 #ifndef PRODUCT 1729 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1730 st->print("# breakpoint"); 1731 } 1732 #endif 1733 1734 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1735 MacroAssembler _masm(&cbuf); 1736 __ int3(); 1737 } 1738 1739 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1740 return MachNode::size(ra_); 1741 } 1742 1743 %} 1744 1745 encode %{ 1746 1747 enc_class call_epilog %{ 1748 if (VerifyStackAtCalls) { 1749 // Check that stack depth is unchanged: find majik cookie on stack 1750 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1751 MacroAssembler _masm(&cbuf); 1752 Label L; 1753 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1754 __ jccb(Assembler::equal, L); 1755 // Die if stack mismatch 1756 __ int3(); 1757 __ bind(L); 1758 } 1759 %} 1760 1761 %} 1762 1763 1764 //----------OPERANDS----------------------------------------------------------- 1765 // Operand definitions must precede instruction definitions for correct parsing 1766 // in the ADLC because operands constitute user defined types which are used in 1767 // instruction definitions. 1768 1769 // This one generically applies only for evex, so only one version 1770 operand vecZ() %{ 1771 constraint(ALLOC_IN_RC(vectorz_reg)); 1772 match(VecZ); 1773 1774 format %{ %} 1775 interface(REG_INTER); 1776 %} 1777 1778 // Comparison Code for FP conditional move 1779 operand cmpOp_vcmppd() %{ 1780 match(Bool); 1781 1782 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1783 n->as_Bool()->_test._test != BoolTest::no_overflow); 1784 format %{ "" %} 1785 interface(COND_INTER) %{ 1786 equal (0x0, "eq"); 1787 less (0x1, "lt"); 1788 less_equal (0x2, "le"); 1789 not_equal (0xC, "ne"); 1790 greater_equal(0xD, "ge"); 1791 greater (0xE, "gt"); 1792 //TODO cannot compile (adlc breaks) without two next lines with error: 1793 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1794 // equal' for overflow. 1795 overflow (0x20, "o"); // not really supported by the instruction 1796 no_overflow (0x21, "no"); // not really supported by the instruction 1797 %} 1798 %} 1799 1800 1801 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1802 1803 // ============================================================================ 1804 1805 instruct ShouldNotReachHere() %{ 1806 match(Halt); 1807 format %{ "ud2\t# ShouldNotReachHere" %} 1808 ins_encode %{ 1809 __ ud2(); 1810 %} 1811 ins_pipe(pipe_slow); 1812 %} 1813 1814 // =================================EVEX special=============================== 1815 1816 instruct setMask(rRegI dst, rRegI src) %{ 1817 predicate(Matcher::has_predicated_vectors()); 1818 match(Set dst (SetVectMaskI src)); 1819 effect(TEMP dst); 1820 format %{ "setvectmask $dst, $src" %} 1821 ins_encode %{ 1822 __ setvectmask($dst$$Register, $src$$Register); 1823 %} 1824 ins_pipe(pipe_slow); 1825 %} 1826 1827 // ============================================================================ 1828 1829 instruct addF_reg(regF dst, regF src) %{ 1830 predicate((UseSSE>=1) && (UseAVX == 0)); 1831 match(Set dst (AddF dst src)); 1832 1833 format %{ "addss $dst, $src" %} 1834 ins_cost(150); 1835 ins_encode %{ 1836 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1837 %} 1838 ins_pipe(pipe_slow); 1839 %} 1840 1841 instruct addF_mem(regF dst, memory src) %{ 1842 predicate((UseSSE>=1) && (UseAVX == 0)); 1843 match(Set dst (AddF dst (LoadF src))); 1844 1845 format %{ "addss $dst, $src" %} 1846 ins_cost(150); 1847 ins_encode %{ 1848 __ addss($dst$$XMMRegister, $src$$Address); 1849 %} 1850 ins_pipe(pipe_slow); 1851 %} 1852 1853 instruct addF_imm(regF dst, immF con) %{ 1854 predicate((UseSSE>=1) && (UseAVX == 0)); 1855 match(Set dst (AddF dst con)); 1856 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1857 ins_cost(150); 1858 ins_encode %{ 1859 __ addss($dst$$XMMRegister, $constantaddress($con)); 1860 %} 1861 ins_pipe(pipe_slow); 1862 %} 1863 1864 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1865 predicate(UseAVX > 0); 1866 match(Set dst (AddF src1 src2)); 1867 1868 format %{ "vaddss $dst, $src1, $src2" %} 1869 ins_cost(150); 1870 ins_encode %{ 1871 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1872 %} 1873 ins_pipe(pipe_slow); 1874 %} 1875 1876 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1877 predicate(UseAVX > 0); 1878 match(Set dst (AddF src1 (LoadF src2))); 1879 1880 format %{ "vaddss $dst, $src1, $src2" %} 1881 ins_cost(150); 1882 ins_encode %{ 1883 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1884 %} 1885 ins_pipe(pipe_slow); 1886 %} 1887 1888 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1889 predicate(UseAVX > 0); 1890 match(Set dst (AddF src con)); 1891 1892 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1893 ins_cost(150); 1894 ins_encode %{ 1895 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1896 %} 1897 ins_pipe(pipe_slow); 1898 %} 1899 1900 instruct addD_reg(regD dst, regD src) %{ 1901 predicate((UseSSE>=2) && (UseAVX == 0)); 1902 match(Set dst (AddD dst src)); 1903 1904 format %{ "addsd $dst, $src" %} 1905 ins_cost(150); 1906 ins_encode %{ 1907 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1908 %} 1909 ins_pipe(pipe_slow); 1910 %} 1911 1912 instruct addD_mem(regD dst, memory src) %{ 1913 predicate((UseSSE>=2) && (UseAVX == 0)); 1914 match(Set dst (AddD dst (LoadD src))); 1915 1916 format %{ "addsd $dst, $src" %} 1917 ins_cost(150); 1918 ins_encode %{ 1919 __ addsd($dst$$XMMRegister, $src$$Address); 1920 %} 1921 ins_pipe(pipe_slow); 1922 %} 1923 1924 instruct addD_imm(regD dst, immD con) %{ 1925 predicate((UseSSE>=2) && (UseAVX == 0)); 1926 match(Set dst (AddD dst con)); 1927 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1928 ins_cost(150); 1929 ins_encode %{ 1930 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1931 %} 1932 ins_pipe(pipe_slow); 1933 %} 1934 1935 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1936 predicate(UseAVX > 0); 1937 match(Set dst (AddD src1 src2)); 1938 1939 format %{ "vaddsd $dst, $src1, $src2" %} 1940 ins_cost(150); 1941 ins_encode %{ 1942 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1943 %} 1944 ins_pipe(pipe_slow); 1945 %} 1946 1947 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1948 predicate(UseAVX > 0); 1949 match(Set dst (AddD src1 (LoadD src2))); 1950 1951 format %{ "vaddsd $dst, $src1, $src2" %} 1952 ins_cost(150); 1953 ins_encode %{ 1954 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1955 %} 1956 ins_pipe(pipe_slow); 1957 %} 1958 1959 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1960 predicate(UseAVX > 0); 1961 match(Set dst (AddD src con)); 1962 1963 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1964 ins_cost(150); 1965 ins_encode %{ 1966 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1967 %} 1968 ins_pipe(pipe_slow); 1969 %} 1970 1971 instruct subF_reg(regF dst, regF src) %{ 1972 predicate((UseSSE>=1) && (UseAVX == 0)); 1973 match(Set dst (SubF dst src)); 1974 1975 format %{ "subss $dst, $src" %} 1976 ins_cost(150); 1977 ins_encode %{ 1978 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1979 %} 1980 ins_pipe(pipe_slow); 1981 %} 1982 1983 instruct subF_mem(regF dst, memory src) %{ 1984 predicate((UseSSE>=1) && (UseAVX == 0)); 1985 match(Set dst (SubF dst (LoadF src))); 1986 1987 format %{ "subss $dst, $src" %} 1988 ins_cost(150); 1989 ins_encode %{ 1990 __ subss($dst$$XMMRegister, $src$$Address); 1991 %} 1992 ins_pipe(pipe_slow); 1993 %} 1994 1995 instruct subF_imm(regF dst, immF con) %{ 1996 predicate((UseSSE>=1) && (UseAVX == 0)); 1997 match(Set dst (SubF dst con)); 1998 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1999 ins_cost(150); 2000 ins_encode %{ 2001 __ subss($dst$$XMMRegister, $constantaddress($con)); 2002 %} 2003 ins_pipe(pipe_slow); 2004 %} 2005 2006 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2007 predicate(UseAVX > 0); 2008 match(Set dst (SubF src1 src2)); 2009 2010 format %{ "vsubss $dst, $src1, $src2" %} 2011 ins_cost(150); 2012 ins_encode %{ 2013 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2014 %} 2015 ins_pipe(pipe_slow); 2016 %} 2017 2018 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2019 predicate(UseAVX > 0); 2020 match(Set dst (SubF src1 (LoadF src2))); 2021 2022 format %{ "vsubss $dst, $src1, $src2" %} 2023 ins_cost(150); 2024 ins_encode %{ 2025 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2026 %} 2027 ins_pipe(pipe_slow); 2028 %} 2029 2030 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2031 predicate(UseAVX > 0); 2032 match(Set dst (SubF src con)); 2033 2034 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2035 ins_cost(150); 2036 ins_encode %{ 2037 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2038 %} 2039 ins_pipe(pipe_slow); 2040 %} 2041 2042 instruct subD_reg(regD dst, regD src) %{ 2043 predicate((UseSSE>=2) && (UseAVX == 0)); 2044 match(Set dst (SubD dst src)); 2045 2046 format %{ "subsd $dst, $src" %} 2047 ins_cost(150); 2048 ins_encode %{ 2049 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2050 %} 2051 ins_pipe(pipe_slow); 2052 %} 2053 2054 instruct subD_mem(regD dst, memory src) %{ 2055 predicate((UseSSE>=2) && (UseAVX == 0)); 2056 match(Set dst (SubD dst (LoadD src))); 2057 2058 format %{ "subsd $dst, $src" %} 2059 ins_cost(150); 2060 ins_encode %{ 2061 __ subsd($dst$$XMMRegister, $src$$Address); 2062 %} 2063 ins_pipe(pipe_slow); 2064 %} 2065 2066 instruct subD_imm(regD dst, immD con) %{ 2067 predicate((UseSSE>=2) && (UseAVX == 0)); 2068 match(Set dst (SubD dst con)); 2069 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2070 ins_cost(150); 2071 ins_encode %{ 2072 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2073 %} 2074 ins_pipe(pipe_slow); 2075 %} 2076 2077 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2078 predicate(UseAVX > 0); 2079 match(Set dst (SubD src1 src2)); 2080 2081 format %{ "vsubsd $dst, $src1, $src2" %} 2082 ins_cost(150); 2083 ins_encode %{ 2084 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2085 %} 2086 ins_pipe(pipe_slow); 2087 %} 2088 2089 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2090 predicate(UseAVX > 0); 2091 match(Set dst (SubD src1 (LoadD src2))); 2092 2093 format %{ "vsubsd $dst, $src1, $src2" %} 2094 ins_cost(150); 2095 ins_encode %{ 2096 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2097 %} 2098 ins_pipe(pipe_slow); 2099 %} 2100 2101 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2102 predicate(UseAVX > 0); 2103 match(Set dst (SubD src con)); 2104 2105 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2106 ins_cost(150); 2107 ins_encode %{ 2108 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2109 %} 2110 ins_pipe(pipe_slow); 2111 %} 2112 2113 instruct mulF_reg(regF dst, regF src) %{ 2114 predicate((UseSSE>=1) && (UseAVX == 0)); 2115 match(Set dst (MulF dst src)); 2116 2117 format %{ "mulss $dst, $src" %} 2118 ins_cost(150); 2119 ins_encode %{ 2120 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2121 %} 2122 ins_pipe(pipe_slow); 2123 %} 2124 2125 instruct mulF_mem(regF dst, memory src) %{ 2126 predicate((UseSSE>=1) && (UseAVX == 0)); 2127 match(Set dst (MulF dst (LoadF src))); 2128 2129 format %{ "mulss $dst, $src" %} 2130 ins_cost(150); 2131 ins_encode %{ 2132 __ mulss($dst$$XMMRegister, $src$$Address); 2133 %} 2134 ins_pipe(pipe_slow); 2135 %} 2136 2137 instruct mulF_imm(regF dst, immF con) %{ 2138 predicate((UseSSE>=1) && (UseAVX == 0)); 2139 match(Set dst (MulF dst con)); 2140 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2141 ins_cost(150); 2142 ins_encode %{ 2143 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2144 %} 2145 ins_pipe(pipe_slow); 2146 %} 2147 2148 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2149 predicate(UseAVX > 0); 2150 match(Set dst (MulF src1 src2)); 2151 2152 format %{ "vmulss $dst, $src1, $src2" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2161 predicate(UseAVX > 0); 2162 match(Set dst (MulF src1 (LoadF src2))); 2163 2164 format %{ "vmulss $dst, $src1, $src2" %} 2165 ins_cost(150); 2166 ins_encode %{ 2167 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2168 %} 2169 ins_pipe(pipe_slow); 2170 %} 2171 2172 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2173 predicate(UseAVX > 0); 2174 match(Set dst (MulF src con)); 2175 2176 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2177 ins_cost(150); 2178 ins_encode %{ 2179 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2180 %} 2181 ins_pipe(pipe_slow); 2182 %} 2183 2184 instruct mulD_reg(regD dst, regD src) %{ 2185 predicate((UseSSE>=2) && (UseAVX == 0)); 2186 match(Set dst (MulD dst src)); 2187 2188 format %{ "mulsd $dst, $src" %} 2189 ins_cost(150); 2190 ins_encode %{ 2191 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2192 %} 2193 ins_pipe(pipe_slow); 2194 %} 2195 2196 instruct mulD_mem(regD dst, memory src) %{ 2197 predicate((UseSSE>=2) && (UseAVX == 0)); 2198 match(Set dst (MulD dst (LoadD src))); 2199 2200 format %{ "mulsd $dst, $src" %} 2201 ins_cost(150); 2202 ins_encode %{ 2203 __ mulsd($dst$$XMMRegister, $src$$Address); 2204 %} 2205 ins_pipe(pipe_slow); 2206 %} 2207 2208 instruct mulD_imm(regD dst, immD con) %{ 2209 predicate((UseSSE>=2) && (UseAVX == 0)); 2210 match(Set dst (MulD dst con)); 2211 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2220 predicate(UseAVX > 0); 2221 match(Set dst (MulD src1 src2)); 2222 2223 format %{ "vmulsd $dst, $src1, $src2" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2232 predicate(UseAVX > 0); 2233 match(Set dst (MulD src1 (LoadD src2))); 2234 2235 format %{ "vmulsd $dst, $src1, $src2" %} 2236 ins_cost(150); 2237 ins_encode %{ 2238 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2239 %} 2240 ins_pipe(pipe_slow); 2241 %} 2242 2243 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2244 predicate(UseAVX > 0); 2245 match(Set dst (MulD src con)); 2246 2247 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2248 ins_cost(150); 2249 ins_encode %{ 2250 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2251 %} 2252 ins_pipe(pipe_slow); 2253 %} 2254 2255 instruct divF_reg(regF dst, regF src) %{ 2256 predicate((UseSSE>=1) && (UseAVX == 0)); 2257 match(Set dst (DivF dst src)); 2258 2259 format %{ "divss $dst, $src" %} 2260 ins_cost(150); 2261 ins_encode %{ 2262 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2263 %} 2264 ins_pipe(pipe_slow); 2265 %} 2266 2267 instruct divF_mem(regF dst, memory src) %{ 2268 predicate((UseSSE>=1) && (UseAVX == 0)); 2269 match(Set dst (DivF dst (LoadF src))); 2270 2271 format %{ "divss $dst, $src" %} 2272 ins_cost(150); 2273 ins_encode %{ 2274 __ divss($dst$$XMMRegister, $src$$Address); 2275 %} 2276 ins_pipe(pipe_slow); 2277 %} 2278 2279 instruct divF_imm(regF dst, immF con) %{ 2280 predicate((UseSSE>=1) && (UseAVX == 0)); 2281 match(Set dst (DivF dst con)); 2282 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ divss($dst$$XMMRegister, $constantaddress($con)); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2291 predicate(UseAVX > 0); 2292 match(Set dst (DivF src1 src2)); 2293 2294 format %{ "vdivss $dst, $src1, $src2" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2303 predicate(UseAVX > 0); 2304 match(Set dst (DivF src1 (LoadF src2))); 2305 2306 format %{ "vdivss $dst, $src1, $src2" %} 2307 ins_cost(150); 2308 ins_encode %{ 2309 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2310 %} 2311 ins_pipe(pipe_slow); 2312 %} 2313 2314 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2315 predicate(UseAVX > 0); 2316 match(Set dst (DivF src con)); 2317 2318 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2319 ins_cost(150); 2320 ins_encode %{ 2321 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2322 %} 2323 ins_pipe(pipe_slow); 2324 %} 2325 2326 instruct divD_reg(regD dst, regD src) %{ 2327 predicate((UseSSE>=2) && (UseAVX == 0)); 2328 match(Set dst (DivD dst src)); 2329 2330 format %{ "divsd $dst, $src" %} 2331 ins_cost(150); 2332 ins_encode %{ 2333 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2334 %} 2335 ins_pipe(pipe_slow); 2336 %} 2337 2338 instruct divD_mem(regD dst, memory src) %{ 2339 predicate((UseSSE>=2) && (UseAVX == 0)); 2340 match(Set dst (DivD dst (LoadD src))); 2341 2342 format %{ "divsd $dst, $src" %} 2343 ins_cost(150); 2344 ins_encode %{ 2345 __ divsd($dst$$XMMRegister, $src$$Address); 2346 %} 2347 ins_pipe(pipe_slow); 2348 %} 2349 2350 instruct divD_imm(regD dst, immD con) %{ 2351 predicate((UseSSE>=2) && (UseAVX == 0)); 2352 match(Set dst (DivD dst con)); 2353 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2362 predicate(UseAVX > 0); 2363 match(Set dst (DivD src1 src2)); 2364 2365 format %{ "vdivsd $dst, $src1, $src2" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2374 predicate(UseAVX > 0); 2375 match(Set dst (DivD src1 (LoadD src2))); 2376 2377 format %{ "vdivsd $dst, $src1, $src2" %} 2378 ins_cost(150); 2379 ins_encode %{ 2380 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2381 %} 2382 ins_pipe(pipe_slow); 2383 %} 2384 2385 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2386 predicate(UseAVX > 0); 2387 match(Set dst (DivD src con)); 2388 2389 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2390 ins_cost(150); 2391 ins_encode %{ 2392 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2393 %} 2394 ins_pipe(pipe_slow); 2395 %} 2396 2397 instruct absF_reg(regF dst) %{ 2398 predicate((UseSSE>=1) && (UseAVX == 0)); 2399 match(Set dst (AbsF dst)); 2400 ins_cost(150); 2401 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2402 ins_encode %{ 2403 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2404 %} 2405 ins_pipe(pipe_slow); 2406 %} 2407 2408 instruct absF_reg_reg(regF dst, regF src) %{ 2409 predicate(VM_Version::supports_avxonly()); 2410 match(Set dst (AbsF src)); 2411 ins_cost(150); 2412 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2413 ins_encode %{ 2414 int vector_len = 0; 2415 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2416 ExternalAddress(float_signmask()), vector_len); 2417 %} 2418 ins_pipe(pipe_slow); 2419 %} 2420 2421 #ifdef _LP64 2422 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2423 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2424 match(Set dst (AbsF src)); 2425 ins_cost(150); 2426 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2427 ins_encode %{ 2428 int vector_len = 0; 2429 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2430 ExternalAddress(float_signmask()), vector_len); 2431 %} 2432 ins_pipe(pipe_slow); 2433 %} 2434 2435 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2436 predicate(VM_Version::supports_avx512novl()); 2437 match(Set dst (AbsF src1)); 2438 effect(TEMP src2); 2439 ins_cost(150); 2440 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2441 ins_encode %{ 2442 int vector_len = 0; 2443 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2444 ExternalAddress(float_signmask()), vector_len); 2445 %} 2446 ins_pipe(pipe_slow); 2447 %} 2448 #else // _LP64 2449 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2450 predicate(UseAVX > 2); 2451 match(Set dst (AbsF src)); 2452 ins_cost(150); 2453 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2454 ins_encode %{ 2455 int vector_len = 0; 2456 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2457 ExternalAddress(float_signmask()), vector_len); 2458 %} 2459 ins_pipe(pipe_slow); 2460 %} 2461 #endif 2462 2463 instruct absD_reg(regD dst) %{ 2464 predicate((UseSSE>=2) && (UseAVX == 0)); 2465 match(Set dst (AbsD dst)); 2466 ins_cost(150); 2467 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2468 "# abs double by sign masking" %} 2469 ins_encode %{ 2470 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2471 %} 2472 ins_pipe(pipe_slow); 2473 %} 2474 2475 instruct absD_reg_reg(regD dst, regD src) %{ 2476 predicate(VM_Version::supports_avxonly()); 2477 match(Set dst (AbsD src)); 2478 ins_cost(150); 2479 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2480 "# abs double by sign masking" %} 2481 ins_encode %{ 2482 int vector_len = 0; 2483 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2484 ExternalAddress(double_signmask()), vector_len); 2485 %} 2486 ins_pipe(pipe_slow); 2487 %} 2488 2489 #ifdef _LP64 2490 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2491 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2492 match(Set dst (AbsD src)); 2493 ins_cost(150); 2494 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2495 "# abs double by sign masking" %} 2496 ins_encode %{ 2497 int vector_len = 0; 2498 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2499 ExternalAddress(double_signmask()), vector_len); 2500 %} 2501 ins_pipe(pipe_slow); 2502 %} 2503 2504 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2505 predicate(VM_Version::supports_avx512novl()); 2506 match(Set dst (AbsD src1)); 2507 effect(TEMP src2); 2508 ins_cost(150); 2509 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2510 ins_encode %{ 2511 int vector_len = 0; 2512 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2513 ExternalAddress(double_signmask()), vector_len); 2514 %} 2515 ins_pipe(pipe_slow); 2516 %} 2517 #else // _LP64 2518 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2519 predicate(UseAVX > 2); 2520 match(Set dst (AbsD src)); 2521 ins_cost(150); 2522 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2523 "# abs double by sign masking" %} 2524 ins_encode %{ 2525 int vector_len = 0; 2526 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2527 ExternalAddress(double_signmask()), vector_len); 2528 %} 2529 ins_pipe(pipe_slow); 2530 %} 2531 #endif 2532 2533 instruct negF_reg(regF dst) %{ 2534 predicate((UseSSE>=1) && (UseAVX == 0)); 2535 match(Set dst (NegF dst)); 2536 ins_cost(150); 2537 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2538 ins_encode %{ 2539 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2540 %} 2541 ins_pipe(pipe_slow); 2542 %} 2543 2544 instruct negF_reg_reg(regF dst, regF src) %{ 2545 predicate(UseAVX > 0); 2546 match(Set dst (NegF src)); 2547 ins_cost(150); 2548 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2549 ins_encode %{ 2550 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2551 ExternalAddress(float_signflip())); 2552 %} 2553 ins_pipe(pipe_slow); 2554 %} 2555 2556 instruct negD_reg(regD dst) %{ 2557 predicate((UseSSE>=2) && (UseAVX == 0)); 2558 match(Set dst (NegD dst)); 2559 ins_cost(150); 2560 format %{ "xorpd $dst, [0x8000000000000000]\t" 2561 "# neg double by sign flipping" %} 2562 ins_encode %{ 2563 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2564 %} 2565 ins_pipe(pipe_slow); 2566 %} 2567 2568 instruct negD_reg_reg(regD dst, regD src) %{ 2569 predicate(UseAVX > 0); 2570 match(Set dst (NegD src)); 2571 ins_cost(150); 2572 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2573 "# neg double by sign flipping" %} 2574 ins_encode %{ 2575 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2576 ExternalAddress(double_signflip())); 2577 %} 2578 ins_pipe(pipe_slow); 2579 %} 2580 2581 instruct sqrtF_reg(regF dst, regF src) %{ 2582 predicate(UseSSE>=1); 2583 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 2584 2585 format %{ "sqrtss $dst, $src" %} 2586 ins_cost(150); 2587 ins_encode %{ 2588 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2589 %} 2590 ins_pipe(pipe_slow); 2591 %} 2592 2593 instruct sqrtF_mem(regF dst, memory src) %{ 2594 predicate(UseSSE>=1); 2595 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 2596 2597 format %{ "sqrtss $dst, $src" %} 2598 ins_cost(150); 2599 ins_encode %{ 2600 __ sqrtss($dst$$XMMRegister, $src$$Address); 2601 %} 2602 ins_pipe(pipe_slow); 2603 %} 2604 2605 instruct sqrtF_imm(regF dst, immF con) %{ 2606 predicate(UseSSE>=1); 2607 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 2608 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2609 ins_cost(150); 2610 ins_encode %{ 2611 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2612 %} 2613 ins_pipe(pipe_slow); 2614 %} 2615 2616 instruct sqrtD_reg(regD dst, regD src) %{ 2617 predicate(UseSSE>=2); 2618 match(Set dst (SqrtD src)); 2619 2620 format %{ "sqrtsd $dst, $src" %} 2621 ins_cost(150); 2622 ins_encode %{ 2623 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2624 %} 2625 ins_pipe(pipe_slow); 2626 %} 2627 2628 instruct sqrtD_mem(regD dst, memory src) %{ 2629 predicate(UseSSE>=2); 2630 match(Set dst (SqrtD (LoadD src))); 2631 2632 format %{ "sqrtsd $dst, $src" %} 2633 ins_cost(150); 2634 ins_encode %{ 2635 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2636 %} 2637 ins_pipe(pipe_slow); 2638 %} 2639 2640 instruct sqrtD_imm(regD dst, immD con) %{ 2641 predicate(UseSSE>=2); 2642 match(Set dst (SqrtD con)); 2643 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2644 ins_cost(150); 2645 ins_encode %{ 2646 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2647 %} 2648 ins_pipe(pipe_slow); 2649 %} 2650 2651 instruct onspinwait() %{ 2652 match(OnSpinWait); 2653 ins_cost(200); 2654 2655 format %{ 2656 $$template 2657 if (os::is_MP()) { 2658 $$emit$$"pause\t! membar_onspinwait" 2659 } else { 2660 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2661 } 2662 %} 2663 ins_encode %{ 2664 __ pause(); 2665 %} 2666 ins_pipe(pipe_slow); 2667 %} 2668 2669 // a * b + c 2670 instruct fmaD_reg(regD a, regD b, regD c) %{ 2671 predicate(UseFMA); 2672 match(Set c (FmaD c (Binary a b))); 2673 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2674 ins_cost(150); 2675 ins_encode %{ 2676 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2677 %} 2678 ins_pipe( pipe_slow ); 2679 %} 2680 2681 // a * b + c 2682 instruct fmaF_reg(regF a, regF b, regF c) %{ 2683 predicate(UseFMA); 2684 match(Set c (FmaF c (Binary a b))); 2685 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2686 ins_cost(150); 2687 ins_encode %{ 2688 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2689 %} 2690 ins_pipe( pipe_slow ); 2691 %} 2692 2693 // ====================VECTOR INSTRUCTIONS===================================== 2694 2695 // Load vectors (4 bytes long) 2696 instruct loadV4(vecS dst, memory mem) %{ 2697 predicate(n->as_LoadVector()->memory_size() == 4); 2698 match(Set dst (LoadVector mem)); 2699 ins_cost(125); 2700 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2701 ins_encode %{ 2702 __ movdl($dst$$XMMRegister, $mem$$Address); 2703 %} 2704 ins_pipe( pipe_slow ); 2705 %} 2706 2707 // Load vectors (8 bytes long) 2708 instruct loadV8(vecD dst, memory mem) %{ 2709 predicate(n->as_LoadVector()->memory_size() == 8); 2710 match(Set dst (LoadVector mem)); 2711 ins_cost(125); 2712 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2713 ins_encode %{ 2714 __ movq($dst$$XMMRegister, $mem$$Address); 2715 %} 2716 ins_pipe( pipe_slow ); 2717 %} 2718 2719 // Load vectors (16 bytes long) 2720 instruct loadV16(vecX dst, memory mem) %{ 2721 predicate(n->as_LoadVector()->memory_size() == 16); 2722 match(Set dst (LoadVector mem)); 2723 ins_cost(125); 2724 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2725 ins_encode %{ 2726 __ movdqu($dst$$XMMRegister, $mem$$Address); 2727 %} 2728 ins_pipe( pipe_slow ); 2729 %} 2730 2731 // Load vectors (32 bytes long) 2732 instruct loadV32(vecY dst, memory mem) %{ 2733 predicate(n->as_LoadVector()->memory_size() == 32); 2734 match(Set dst (LoadVector mem)); 2735 ins_cost(125); 2736 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2737 ins_encode %{ 2738 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2739 %} 2740 ins_pipe( pipe_slow ); 2741 %} 2742 2743 // Load vectors (64 bytes long) 2744 instruct loadV64_dword(vecZ dst, memory mem) %{ 2745 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2746 match(Set dst (LoadVector mem)); 2747 ins_cost(125); 2748 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2749 ins_encode %{ 2750 int vector_len = 2; 2751 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2752 %} 2753 ins_pipe( pipe_slow ); 2754 %} 2755 2756 // Load vectors (64 bytes long) 2757 instruct loadV64_qword(vecZ dst, memory mem) %{ 2758 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2759 match(Set dst (LoadVector mem)); 2760 ins_cost(125); 2761 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2762 ins_encode %{ 2763 int vector_len = 2; 2764 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2765 %} 2766 ins_pipe( pipe_slow ); 2767 %} 2768 2769 // Store vectors 2770 instruct storeV4(memory mem, vecS src) %{ 2771 predicate(n->as_StoreVector()->memory_size() == 4); 2772 match(Set mem (StoreVector mem src)); 2773 ins_cost(145); 2774 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2775 ins_encode %{ 2776 __ movdl($mem$$Address, $src$$XMMRegister); 2777 %} 2778 ins_pipe( pipe_slow ); 2779 %} 2780 2781 instruct storeV8(memory mem, vecD src) %{ 2782 predicate(n->as_StoreVector()->memory_size() == 8); 2783 match(Set mem (StoreVector mem src)); 2784 ins_cost(145); 2785 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2786 ins_encode %{ 2787 __ movq($mem$$Address, $src$$XMMRegister); 2788 %} 2789 ins_pipe( pipe_slow ); 2790 %} 2791 2792 instruct storeV16(memory mem, vecX src) %{ 2793 predicate(n->as_StoreVector()->memory_size() == 16); 2794 match(Set mem (StoreVector mem src)); 2795 ins_cost(145); 2796 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2797 ins_encode %{ 2798 __ movdqu($mem$$Address, $src$$XMMRegister); 2799 %} 2800 ins_pipe( pipe_slow ); 2801 %} 2802 2803 instruct storeV32(memory mem, vecY src) %{ 2804 predicate(n->as_StoreVector()->memory_size() == 32); 2805 match(Set mem (StoreVector mem src)); 2806 ins_cost(145); 2807 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2808 ins_encode %{ 2809 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2810 %} 2811 ins_pipe( pipe_slow ); 2812 %} 2813 2814 instruct storeV64_dword(memory mem, vecZ src) %{ 2815 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2816 match(Set mem (StoreVector mem src)); 2817 ins_cost(145); 2818 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2819 ins_encode %{ 2820 int vector_len = 2; 2821 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2822 %} 2823 ins_pipe( pipe_slow ); 2824 %} 2825 2826 instruct storeV64_qword(memory mem, vecZ src) %{ 2827 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2828 match(Set mem (StoreVector mem src)); 2829 ins_cost(145); 2830 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2831 ins_encode %{ 2832 int vector_len = 2; 2833 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2834 %} 2835 ins_pipe( pipe_slow ); 2836 %} 2837 2838 // ====================LEGACY REPLICATE======================================= 2839 2840 instruct Repl4B_mem(vecS dst, memory mem) %{ 2841 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2842 match(Set dst (ReplicateB (LoadB mem))); 2843 format %{ "punpcklbw $dst,$mem\n\t" 2844 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2845 ins_encode %{ 2846 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2847 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2848 %} 2849 ins_pipe( pipe_slow ); 2850 %} 2851 2852 instruct Repl8B_mem(vecD dst, memory mem) %{ 2853 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2854 match(Set dst (ReplicateB (LoadB mem))); 2855 format %{ "punpcklbw $dst,$mem\n\t" 2856 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2857 ins_encode %{ 2858 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2859 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2860 %} 2861 ins_pipe( pipe_slow ); 2862 %} 2863 2864 instruct Repl16B(vecX dst, rRegI src) %{ 2865 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2866 match(Set dst (ReplicateB src)); 2867 format %{ "movd $dst,$src\n\t" 2868 "punpcklbw $dst,$dst\n\t" 2869 "pshuflw $dst,$dst,0x00\n\t" 2870 "punpcklqdq $dst,$dst\t! replicate16B" %} 2871 ins_encode %{ 2872 __ movdl($dst$$XMMRegister, $src$$Register); 2873 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2874 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2875 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2876 %} 2877 ins_pipe( pipe_slow ); 2878 %} 2879 2880 instruct Repl16B_mem(vecX dst, memory mem) %{ 2881 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2882 match(Set dst (ReplicateB (LoadB mem))); 2883 format %{ "punpcklbw $dst,$mem\n\t" 2884 "pshuflw $dst,$dst,0x00\n\t" 2885 "punpcklqdq $dst,$dst\t! replicate16B" %} 2886 ins_encode %{ 2887 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2888 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2889 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2890 %} 2891 ins_pipe( pipe_slow ); 2892 %} 2893 2894 instruct Repl32B(vecY dst, rRegI src) %{ 2895 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2896 match(Set dst (ReplicateB src)); 2897 format %{ "movd $dst,$src\n\t" 2898 "punpcklbw $dst,$dst\n\t" 2899 "pshuflw $dst,$dst,0x00\n\t" 2900 "punpcklqdq $dst,$dst\n\t" 2901 "vinserti128_high $dst,$dst\t! replicate32B" %} 2902 ins_encode %{ 2903 __ movdl($dst$$XMMRegister, $src$$Register); 2904 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 2905 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2906 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2907 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct Repl32B_mem(vecY dst, memory mem) %{ 2913 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2914 match(Set dst (ReplicateB (LoadB mem))); 2915 format %{ "punpcklbw $dst,$mem\n\t" 2916 "pshuflw $dst,$dst,0x00\n\t" 2917 "punpcklqdq $dst,$dst\n\t" 2918 "vinserti128_high $dst,$dst\t! replicate32B" %} 2919 ins_encode %{ 2920 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2921 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2922 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2923 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2924 %} 2925 ins_pipe( pipe_slow ); 2926 %} 2927 2928 instruct Repl16B_imm(vecX dst, immI con) %{ 2929 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 2930 match(Set dst (ReplicateB con)); 2931 format %{ "movq $dst,[$constantaddress]\n\t" 2932 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 2933 ins_encode %{ 2934 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2935 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2936 %} 2937 ins_pipe( pipe_slow ); 2938 %} 2939 2940 instruct Repl32B_imm(vecY dst, immI con) %{ 2941 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 2942 match(Set dst (ReplicateB con)); 2943 format %{ "movq $dst,[$constantaddress]\n\t" 2944 "punpcklqdq $dst,$dst\n\t" 2945 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 2946 ins_encode %{ 2947 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 2948 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2949 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 2950 %} 2951 ins_pipe( pipe_slow ); 2952 %} 2953 2954 instruct Repl4S(vecD dst, rRegI src) %{ 2955 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 2956 match(Set dst (ReplicateS src)); 2957 format %{ "movd $dst,$src\n\t" 2958 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 2959 ins_encode %{ 2960 __ movdl($dst$$XMMRegister, $src$$Register); 2961 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2962 %} 2963 ins_pipe( pipe_slow ); 2964 %} 2965 2966 instruct Repl4S_mem(vecD dst, memory mem) %{ 2967 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2968 match(Set dst (ReplicateS (LoadS mem))); 2969 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 2970 ins_encode %{ 2971 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2972 %} 2973 ins_pipe( pipe_slow ); 2974 %} 2975 2976 instruct Repl8S(vecX dst, rRegI src) %{ 2977 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 2978 match(Set dst (ReplicateS src)); 2979 format %{ "movd $dst,$src\n\t" 2980 "pshuflw $dst,$dst,0x00\n\t" 2981 "punpcklqdq $dst,$dst\t! replicate8S" %} 2982 ins_encode %{ 2983 __ movdl($dst$$XMMRegister, $src$$Register); 2984 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2985 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2986 %} 2987 ins_pipe( pipe_slow ); 2988 %} 2989 2990 instruct Repl8S_mem(vecX dst, memory mem) %{ 2991 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2992 match(Set dst (ReplicateS (LoadS mem))); 2993 format %{ "pshuflw $dst,$mem,0x00\n\t" 2994 "punpcklqdq $dst,$dst\t! replicate8S" %} 2995 ins_encode %{ 2996 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 2997 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2998 %} 2999 ins_pipe( pipe_slow ); 3000 %} 3001 3002 instruct Repl8S_imm(vecX dst, immI con) %{ 3003 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3004 match(Set dst (ReplicateS con)); 3005 format %{ "movq $dst,[$constantaddress]\n\t" 3006 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3007 ins_encode %{ 3008 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3009 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3010 %} 3011 ins_pipe( pipe_slow ); 3012 %} 3013 3014 instruct Repl16S(vecY dst, rRegI src) %{ 3015 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3016 match(Set dst (ReplicateS src)); 3017 format %{ "movd $dst,$src\n\t" 3018 "pshuflw $dst,$dst,0x00\n\t" 3019 "punpcklqdq $dst,$dst\n\t" 3020 "vinserti128_high $dst,$dst\t! replicate16S" %} 3021 ins_encode %{ 3022 __ movdl($dst$$XMMRegister, $src$$Register); 3023 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3024 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3025 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3026 %} 3027 ins_pipe( pipe_slow ); 3028 %} 3029 3030 instruct Repl16S_mem(vecY dst, memory mem) %{ 3031 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3032 match(Set dst (ReplicateS (LoadS mem))); 3033 format %{ "pshuflw $dst,$mem,0x00\n\t" 3034 "punpcklqdq $dst,$dst\n\t" 3035 "vinserti128_high $dst,$dst\t! replicate16S" %} 3036 ins_encode %{ 3037 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3038 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3039 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3040 %} 3041 ins_pipe( pipe_slow ); 3042 %} 3043 3044 instruct Repl16S_imm(vecY dst, immI con) %{ 3045 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3046 match(Set dst (ReplicateS con)); 3047 format %{ "movq $dst,[$constantaddress]\n\t" 3048 "punpcklqdq $dst,$dst\n\t" 3049 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3050 ins_encode %{ 3051 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3052 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3053 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3054 %} 3055 ins_pipe( pipe_slow ); 3056 %} 3057 3058 instruct Repl4I(vecX dst, rRegI src) %{ 3059 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3060 match(Set dst (ReplicateI src)); 3061 format %{ "movd $dst,$src\n\t" 3062 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3063 ins_encode %{ 3064 __ movdl($dst$$XMMRegister, $src$$Register); 3065 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3066 %} 3067 ins_pipe( pipe_slow ); 3068 %} 3069 3070 instruct Repl4I_mem(vecX dst, memory mem) %{ 3071 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3072 match(Set dst (ReplicateI (LoadI mem))); 3073 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3074 ins_encode %{ 3075 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3076 %} 3077 ins_pipe( pipe_slow ); 3078 %} 3079 3080 instruct Repl8I(vecY dst, rRegI src) %{ 3081 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3082 match(Set dst (ReplicateI src)); 3083 format %{ "movd $dst,$src\n\t" 3084 "pshufd $dst,$dst,0x00\n\t" 3085 "vinserti128_high $dst,$dst\t! replicate8I" %} 3086 ins_encode %{ 3087 __ movdl($dst$$XMMRegister, $src$$Register); 3088 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3089 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3090 %} 3091 ins_pipe( pipe_slow ); 3092 %} 3093 3094 instruct Repl8I_mem(vecY dst, memory mem) %{ 3095 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3096 match(Set dst (ReplicateI (LoadI mem))); 3097 format %{ "pshufd $dst,$mem,0x00\n\t" 3098 "vinserti128_high $dst,$dst\t! replicate8I" %} 3099 ins_encode %{ 3100 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3101 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3102 %} 3103 ins_pipe( pipe_slow ); 3104 %} 3105 3106 instruct Repl4I_imm(vecX dst, immI con) %{ 3107 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3108 match(Set dst (ReplicateI con)); 3109 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3110 "punpcklqdq $dst,$dst" %} 3111 ins_encode %{ 3112 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3113 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3114 %} 3115 ins_pipe( pipe_slow ); 3116 %} 3117 3118 instruct Repl8I_imm(vecY dst, immI con) %{ 3119 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3120 match(Set dst (ReplicateI con)); 3121 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3122 "punpcklqdq $dst,$dst\n\t" 3123 "vinserti128_high $dst,$dst" %} 3124 ins_encode %{ 3125 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3126 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3127 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3128 %} 3129 ins_pipe( pipe_slow ); 3130 %} 3131 3132 // Long could be loaded into xmm register directly from memory. 3133 instruct Repl2L_mem(vecX dst, memory mem) %{ 3134 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3135 match(Set dst (ReplicateL (LoadL mem))); 3136 format %{ "movq $dst,$mem\n\t" 3137 "punpcklqdq $dst,$dst\t! replicate2L" %} 3138 ins_encode %{ 3139 __ movq($dst$$XMMRegister, $mem$$Address); 3140 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3141 %} 3142 ins_pipe( pipe_slow ); 3143 %} 3144 3145 // Replicate long (8 byte) scalar to be vector 3146 #ifdef _LP64 3147 instruct Repl4L(vecY dst, rRegL src) %{ 3148 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3149 match(Set dst (ReplicateL src)); 3150 format %{ "movdq $dst,$src\n\t" 3151 "punpcklqdq $dst,$dst\n\t" 3152 "vinserti128_high $dst,$dst\t! replicate4L" %} 3153 ins_encode %{ 3154 __ movdq($dst$$XMMRegister, $src$$Register); 3155 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3156 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3157 %} 3158 ins_pipe( pipe_slow ); 3159 %} 3160 #else // _LP64 3161 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3162 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3163 match(Set dst (ReplicateL src)); 3164 effect(TEMP dst, USE src, TEMP tmp); 3165 format %{ "movdl $dst,$src.lo\n\t" 3166 "movdl $tmp,$src.hi\n\t" 3167 "punpckldq $dst,$tmp\n\t" 3168 "punpcklqdq $dst,$dst\n\t" 3169 "vinserti128_high $dst,$dst\t! replicate4L" %} 3170 ins_encode %{ 3171 __ movdl($dst$$XMMRegister, $src$$Register); 3172 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3173 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3174 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3175 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3176 %} 3177 ins_pipe( pipe_slow ); 3178 %} 3179 #endif // _LP64 3180 3181 instruct Repl4L_imm(vecY dst, immL con) %{ 3182 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3183 match(Set dst (ReplicateL con)); 3184 format %{ "movq $dst,[$constantaddress]\n\t" 3185 "punpcklqdq $dst,$dst\n\t" 3186 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3187 ins_encode %{ 3188 __ movq($dst$$XMMRegister, $constantaddress($con)); 3189 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3190 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3191 %} 3192 ins_pipe( pipe_slow ); 3193 %} 3194 3195 instruct Repl4L_mem(vecY dst, memory mem) %{ 3196 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3197 match(Set dst (ReplicateL (LoadL mem))); 3198 format %{ "movq $dst,$mem\n\t" 3199 "punpcklqdq $dst,$dst\n\t" 3200 "vinserti128_high $dst,$dst\t! replicate4L" %} 3201 ins_encode %{ 3202 __ movq($dst$$XMMRegister, $mem$$Address); 3203 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3204 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3205 %} 3206 ins_pipe( pipe_slow ); 3207 %} 3208 3209 instruct Repl2F_mem(vecD dst, memory mem) %{ 3210 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3211 match(Set dst (ReplicateF (LoadF mem))); 3212 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3213 ins_encode %{ 3214 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3215 %} 3216 ins_pipe( pipe_slow ); 3217 %} 3218 3219 instruct Repl4F_mem(vecX dst, memory mem) %{ 3220 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3221 match(Set dst (ReplicateF (LoadF mem))); 3222 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3223 ins_encode %{ 3224 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3225 %} 3226 ins_pipe( pipe_slow ); 3227 %} 3228 3229 instruct Repl8F(vecY dst, regF src) %{ 3230 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3231 match(Set dst (ReplicateF src)); 3232 format %{ "pshufd $dst,$src,0x00\n\t" 3233 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3234 ins_encode %{ 3235 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3236 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3237 %} 3238 ins_pipe( pipe_slow ); 3239 %} 3240 3241 instruct Repl8F_mem(vecY dst, memory mem) %{ 3242 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3243 match(Set dst (ReplicateF (LoadF mem))); 3244 format %{ "pshufd $dst,$mem,0x00\n\t" 3245 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3246 ins_encode %{ 3247 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3248 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3249 %} 3250 ins_pipe( pipe_slow ); 3251 %} 3252 3253 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3254 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3255 match(Set dst (ReplicateF zero)); 3256 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3257 ins_encode %{ 3258 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3259 %} 3260 ins_pipe( fpu_reg_reg ); 3261 %} 3262 3263 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3264 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3265 match(Set dst (ReplicateF zero)); 3266 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3267 ins_encode %{ 3268 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3269 %} 3270 ins_pipe( fpu_reg_reg ); 3271 %} 3272 3273 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3274 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3275 match(Set dst (ReplicateF zero)); 3276 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3277 ins_encode %{ 3278 int vector_len = 1; 3279 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3280 %} 3281 ins_pipe( fpu_reg_reg ); 3282 %} 3283 3284 instruct Repl2D_mem(vecX dst, memory mem) %{ 3285 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3286 match(Set dst (ReplicateD (LoadD mem))); 3287 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3288 ins_encode %{ 3289 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3290 %} 3291 ins_pipe( pipe_slow ); 3292 %} 3293 3294 instruct Repl4D(vecY dst, regD src) %{ 3295 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3296 match(Set dst (ReplicateD src)); 3297 format %{ "pshufd $dst,$src,0x44\n\t" 3298 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3299 ins_encode %{ 3300 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3301 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3302 %} 3303 ins_pipe( pipe_slow ); 3304 %} 3305 3306 instruct Repl4D_mem(vecY dst, memory mem) %{ 3307 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3308 match(Set dst (ReplicateD (LoadD mem))); 3309 format %{ "pshufd $dst,$mem,0x44\n\t" 3310 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3311 ins_encode %{ 3312 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3313 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3314 %} 3315 ins_pipe( pipe_slow ); 3316 %} 3317 3318 // Replicate double (8 byte) scalar zero to be vector 3319 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3320 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3321 match(Set dst (ReplicateD zero)); 3322 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3323 ins_encode %{ 3324 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3325 %} 3326 ins_pipe( fpu_reg_reg ); 3327 %} 3328 3329 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3330 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3331 match(Set dst (ReplicateD zero)); 3332 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3333 ins_encode %{ 3334 int vector_len = 1; 3335 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3336 %} 3337 ins_pipe( fpu_reg_reg ); 3338 %} 3339 3340 // ====================GENERIC REPLICATE========================================== 3341 3342 // Replicate byte scalar to be vector 3343 instruct Repl4B(vecS dst, rRegI src) %{ 3344 predicate(n->as_Vector()->length() == 4); 3345 match(Set dst (ReplicateB src)); 3346 format %{ "movd $dst,$src\n\t" 3347 "punpcklbw $dst,$dst\n\t" 3348 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3349 ins_encode %{ 3350 __ movdl($dst$$XMMRegister, $src$$Register); 3351 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3352 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3353 %} 3354 ins_pipe( pipe_slow ); 3355 %} 3356 3357 instruct Repl8B(vecD dst, rRegI src) %{ 3358 predicate(n->as_Vector()->length() == 8); 3359 match(Set dst (ReplicateB src)); 3360 format %{ "movd $dst,$src\n\t" 3361 "punpcklbw $dst,$dst\n\t" 3362 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3363 ins_encode %{ 3364 __ movdl($dst$$XMMRegister, $src$$Register); 3365 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3366 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3367 %} 3368 ins_pipe( pipe_slow ); 3369 %} 3370 3371 // Replicate byte scalar immediate to be vector by loading from const table. 3372 instruct Repl4B_imm(vecS dst, immI con) %{ 3373 predicate(n->as_Vector()->length() == 4); 3374 match(Set dst (ReplicateB con)); 3375 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3376 ins_encode %{ 3377 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3378 %} 3379 ins_pipe( pipe_slow ); 3380 %} 3381 3382 instruct Repl8B_imm(vecD dst, immI con) %{ 3383 predicate(n->as_Vector()->length() == 8); 3384 match(Set dst (ReplicateB con)); 3385 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3386 ins_encode %{ 3387 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3388 %} 3389 ins_pipe( pipe_slow ); 3390 %} 3391 3392 // Replicate byte scalar zero to be vector 3393 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3394 predicate(n->as_Vector()->length() == 4); 3395 match(Set dst (ReplicateB zero)); 3396 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3397 ins_encode %{ 3398 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3399 %} 3400 ins_pipe( fpu_reg_reg ); 3401 %} 3402 3403 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3404 predicate(n->as_Vector()->length() == 8); 3405 match(Set dst (ReplicateB zero)); 3406 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3407 ins_encode %{ 3408 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3409 %} 3410 ins_pipe( fpu_reg_reg ); 3411 %} 3412 3413 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3414 predicate(n->as_Vector()->length() == 16); 3415 match(Set dst (ReplicateB zero)); 3416 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3417 ins_encode %{ 3418 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3419 %} 3420 ins_pipe( fpu_reg_reg ); 3421 %} 3422 3423 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3424 predicate(n->as_Vector()->length() == 32); 3425 match(Set dst (ReplicateB zero)); 3426 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3427 ins_encode %{ 3428 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3429 int vector_len = 1; 3430 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3431 %} 3432 ins_pipe( fpu_reg_reg ); 3433 %} 3434 3435 // Replicate char/short (2 byte) scalar to be vector 3436 instruct Repl2S(vecS dst, rRegI src) %{ 3437 predicate(n->as_Vector()->length() == 2); 3438 match(Set dst (ReplicateS src)); 3439 format %{ "movd $dst,$src\n\t" 3440 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3441 ins_encode %{ 3442 __ movdl($dst$$XMMRegister, $src$$Register); 3443 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3444 %} 3445 ins_pipe( fpu_reg_reg ); 3446 %} 3447 3448 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3449 instruct Repl2S_imm(vecS dst, immI con) %{ 3450 predicate(n->as_Vector()->length() == 2); 3451 match(Set dst (ReplicateS con)); 3452 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3453 ins_encode %{ 3454 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3455 %} 3456 ins_pipe( fpu_reg_reg ); 3457 %} 3458 3459 instruct Repl4S_imm(vecD dst, immI con) %{ 3460 predicate(n->as_Vector()->length() == 4); 3461 match(Set dst (ReplicateS con)); 3462 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3463 ins_encode %{ 3464 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3465 %} 3466 ins_pipe( fpu_reg_reg ); 3467 %} 3468 3469 // Replicate char/short (2 byte) scalar zero to be vector 3470 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3471 predicate(n->as_Vector()->length() == 2); 3472 match(Set dst (ReplicateS zero)); 3473 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3474 ins_encode %{ 3475 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3476 %} 3477 ins_pipe( fpu_reg_reg ); 3478 %} 3479 3480 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3481 predicate(n->as_Vector()->length() == 4); 3482 match(Set dst (ReplicateS zero)); 3483 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3484 ins_encode %{ 3485 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3486 %} 3487 ins_pipe( fpu_reg_reg ); 3488 %} 3489 3490 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3491 predicate(n->as_Vector()->length() == 8); 3492 match(Set dst (ReplicateS zero)); 3493 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3494 ins_encode %{ 3495 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3496 %} 3497 ins_pipe( fpu_reg_reg ); 3498 %} 3499 3500 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3501 predicate(n->as_Vector()->length() == 16); 3502 match(Set dst (ReplicateS zero)); 3503 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3504 ins_encode %{ 3505 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3506 int vector_len = 1; 3507 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3508 %} 3509 ins_pipe( fpu_reg_reg ); 3510 %} 3511 3512 // Replicate integer (4 byte) scalar to be vector 3513 instruct Repl2I(vecD dst, rRegI src) %{ 3514 predicate(n->as_Vector()->length() == 2); 3515 match(Set dst (ReplicateI src)); 3516 format %{ "movd $dst,$src\n\t" 3517 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3518 ins_encode %{ 3519 __ movdl($dst$$XMMRegister, $src$$Register); 3520 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3521 %} 3522 ins_pipe( fpu_reg_reg ); 3523 %} 3524 3525 // Integer could be loaded into xmm register directly from memory. 3526 instruct Repl2I_mem(vecD dst, memory mem) %{ 3527 predicate(n->as_Vector()->length() == 2); 3528 match(Set dst (ReplicateI (LoadI mem))); 3529 format %{ "movd $dst,$mem\n\t" 3530 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3531 ins_encode %{ 3532 __ movdl($dst$$XMMRegister, $mem$$Address); 3533 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3534 %} 3535 ins_pipe( fpu_reg_reg ); 3536 %} 3537 3538 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3539 instruct Repl2I_imm(vecD dst, immI con) %{ 3540 predicate(n->as_Vector()->length() == 2); 3541 match(Set dst (ReplicateI con)); 3542 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3543 ins_encode %{ 3544 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3545 %} 3546 ins_pipe( fpu_reg_reg ); 3547 %} 3548 3549 // Replicate integer (4 byte) scalar zero to be vector 3550 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3551 predicate(n->as_Vector()->length() == 2); 3552 match(Set dst (ReplicateI zero)); 3553 format %{ "pxor $dst,$dst\t! replicate2I" %} 3554 ins_encode %{ 3555 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3556 %} 3557 ins_pipe( fpu_reg_reg ); 3558 %} 3559 3560 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3561 predicate(n->as_Vector()->length() == 4); 3562 match(Set dst (ReplicateI zero)); 3563 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3564 ins_encode %{ 3565 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3566 %} 3567 ins_pipe( fpu_reg_reg ); 3568 %} 3569 3570 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3571 predicate(n->as_Vector()->length() == 8); 3572 match(Set dst (ReplicateI zero)); 3573 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3574 ins_encode %{ 3575 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3576 int vector_len = 1; 3577 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3578 %} 3579 ins_pipe( fpu_reg_reg ); 3580 %} 3581 3582 // Replicate long (8 byte) scalar to be vector 3583 #ifdef _LP64 3584 instruct Repl2L(vecX dst, rRegL src) %{ 3585 predicate(n->as_Vector()->length() == 2); 3586 match(Set dst (ReplicateL src)); 3587 format %{ "movdq $dst,$src\n\t" 3588 "punpcklqdq $dst,$dst\t! replicate2L" %} 3589 ins_encode %{ 3590 __ movdq($dst$$XMMRegister, $src$$Register); 3591 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3592 %} 3593 ins_pipe( pipe_slow ); 3594 %} 3595 #else // _LP64 3596 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3597 predicate(n->as_Vector()->length() == 2); 3598 match(Set dst (ReplicateL src)); 3599 effect(TEMP dst, USE src, TEMP tmp); 3600 format %{ "movdl $dst,$src.lo\n\t" 3601 "movdl $tmp,$src.hi\n\t" 3602 "punpckldq $dst,$tmp\n\t" 3603 "punpcklqdq $dst,$dst\t! replicate2L"%} 3604 ins_encode %{ 3605 __ movdl($dst$$XMMRegister, $src$$Register); 3606 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3607 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3608 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3609 %} 3610 ins_pipe( pipe_slow ); 3611 %} 3612 #endif // _LP64 3613 3614 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3615 instruct Repl2L_imm(vecX dst, immL con) %{ 3616 predicate(n->as_Vector()->length() == 2); 3617 match(Set dst (ReplicateL con)); 3618 format %{ "movq $dst,[$constantaddress]\n\t" 3619 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3620 ins_encode %{ 3621 __ movq($dst$$XMMRegister, $constantaddress($con)); 3622 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3623 %} 3624 ins_pipe( pipe_slow ); 3625 %} 3626 3627 // Replicate long (8 byte) scalar zero to be vector 3628 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3629 predicate(n->as_Vector()->length() == 2); 3630 match(Set dst (ReplicateL zero)); 3631 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3632 ins_encode %{ 3633 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3634 %} 3635 ins_pipe( fpu_reg_reg ); 3636 %} 3637 3638 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3639 predicate(n->as_Vector()->length() == 4); 3640 match(Set dst (ReplicateL zero)); 3641 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3642 ins_encode %{ 3643 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3644 int vector_len = 1; 3645 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3646 %} 3647 ins_pipe( fpu_reg_reg ); 3648 %} 3649 3650 // Replicate float (4 byte) scalar to be vector 3651 instruct Repl2F(vecD dst, regF src) %{ 3652 predicate(n->as_Vector()->length() == 2); 3653 match(Set dst (ReplicateF src)); 3654 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3655 ins_encode %{ 3656 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3657 %} 3658 ins_pipe( fpu_reg_reg ); 3659 %} 3660 3661 instruct Repl4F(vecX dst, regF src) %{ 3662 predicate(n->as_Vector()->length() == 4); 3663 match(Set dst (ReplicateF src)); 3664 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3665 ins_encode %{ 3666 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 // Replicate double (8 bytes) scalar to be vector 3672 instruct Repl2D(vecX dst, regD src) %{ 3673 predicate(n->as_Vector()->length() == 2); 3674 match(Set dst (ReplicateD src)); 3675 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3676 ins_encode %{ 3677 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3678 %} 3679 ins_pipe( pipe_slow ); 3680 %} 3681 3682 // ====================EVEX REPLICATE============================================= 3683 3684 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3685 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3686 match(Set dst (ReplicateB (LoadB mem))); 3687 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3688 ins_encode %{ 3689 int vector_len = 0; 3690 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3691 %} 3692 ins_pipe( pipe_slow ); 3693 %} 3694 3695 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3696 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3697 match(Set dst (ReplicateB (LoadB mem))); 3698 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3699 ins_encode %{ 3700 int vector_len = 0; 3701 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3702 %} 3703 ins_pipe( pipe_slow ); 3704 %} 3705 3706 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3707 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3708 match(Set dst (ReplicateB src)); 3709 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3710 ins_encode %{ 3711 int vector_len = 0; 3712 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3718 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3719 match(Set dst (ReplicateB (LoadB mem))); 3720 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3721 ins_encode %{ 3722 int vector_len = 0; 3723 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3729 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3730 match(Set dst (ReplicateB src)); 3731 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3732 ins_encode %{ 3733 int vector_len = 1; 3734 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3735 %} 3736 ins_pipe( pipe_slow ); 3737 %} 3738 3739 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3740 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3741 match(Set dst (ReplicateB (LoadB mem))); 3742 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3743 ins_encode %{ 3744 int vector_len = 1; 3745 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3746 %} 3747 ins_pipe( pipe_slow ); 3748 %} 3749 3750 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3751 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3752 match(Set dst (ReplicateB src)); 3753 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3754 ins_encode %{ 3755 int vector_len = 2; 3756 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3762 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3763 match(Set dst (ReplicateB (LoadB mem))); 3764 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3765 ins_encode %{ 3766 int vector_len = 2; 3767 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3768 %} 3769 ins_pipe( pipe_slow ); 3770 %} 3771 3772 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3773 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3774 match(Set dst (ReplicateB con)); 3775 format %{ "movq $dst,[$constantaddress]\n\t" 3776 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3777 ins_encode %{ 3778 int vector_len = 0; 3779 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3780 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3781 %} 3782 ins_pipe( pipe_slow ); 3783 %} 3784 3785 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3786 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3787 match(Set dst (ReplicateB con)); 3788 format %{ "movq $dst,[$constantaddress]\n\t" 3789 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3790 ins_encode %{ 3791 int vector_len = 1; 3792 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3793 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3799 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3800 match(Set dst (ReplicateB con)); 3801 format %{ "movq $dst,[$constantaddress]\n\t" 3802 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3803 ins_encode %{ 3804 int vector_len = 2; 3805 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3806 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3807 %} 3808 ins_pipe( pipe_slow ); 3809 %} 3810 3811 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3812 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3813 match(Set dst (ReplicateB zero)); 3814 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3815 ins_encode %{ 3816 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3817 int vector_len = 2; 3818 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3819 %} 3820 ins_pipe( fpu_reg_reg ); 3821 %} 3822 3823 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3824 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3825 match(Set dst (ReplicateS src)); 3826 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3827 ins_encode %{ 3828 int vector_len = 0; 3829 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3830 %} 3831 ins_pipe( pipe_slow ); 3832 %} 3833 3834 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3835 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3836 match(Set dst (ReplicateS (LoadS mem))); 3837 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3838 ins_encode %{ 3839 int vector_len = 0; 3840 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3841 %} 3842 ins_pipe( pipe_slow ); 3843 %} 3844 3845 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3846 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3847 match(Set dst (ReplicateS src)); 3848 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3849 ins_encode %{ 3850 int vector_len = 0; 3851 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3852 %} 3853 ins_pipe( pipe_slow ); 3854 %} 3855 3856 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3857 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3858 match(Set dst (ReplicateS (LoadS mem))); 3859 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 3860 ins_encode %{ 3861 int vector_len = 0; 3862 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3863 %} 3864 ins_pipe( pipe_slow ); 3865 %} 3866 3867 instruct Repl16S_evex(vecY dst, rRegI src) %{ 3868 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3869 match(Set dst (ReplicateS src)); 3870 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 3871 ins_encode %{ 3872 int vector_len = 1; 3873 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3874 %} 3875 ins_pipe( pipe_slow ); 3876 %} 3877 3878 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 3879 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3880 match(Set dst (ReplicateS (LoadS mem))); 3881 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 3882 ins_encode %{ 3883 int vector_len = 1; 3884 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3885 %} 3886 ins_pipe( pipe_slow ); 3887 %} 3888 3889 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 3890 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3891 match(Set dst (ReplicateS src)); 3892 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 3893 ins_encode %{ 3894 int vector_len = 2; 3895 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3896 %} 3897 ins_pipe( pipe_slow ); 3898 %} 3899 3900 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 3901 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3902 match(Set dst (ReplicateS (LoadS mem))); 3903 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 3904 ins_encode %{ 3905 int vector_len = 2; 3906 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3907 %} 3908 ins_pipe( pipe_slow ); 3909 %} 3910 3911 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 3912 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3913 match(Set dst (ReplicateS con)); 3914 format %{ "movq $dst,[$constantaddress]\n\t" 3915 "vpbroadcastw $dst,$dst\t! replicate8S" %} 3916 ins_encode %{ 3917 int vector_len = 0; 3918 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3919 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3920 %} 3921 ins_pipe( pipe_slow ); 3922 %} 3923 3924 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 3925 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3926 match(Set dst (ReplicateS con)); 3927 format %{ "movq $dst,[$constantaddress]\n\t" 3928 "vpbroadcastw $dst,$dst\t! replicate16S" %} 3929 ins_encode %{ 3930 int vector_len = 1; 3931 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3932 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3933 %} 3934 ins_pipe( pipe_slow ); 3935 %} 3936 3937 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 3938 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 3939 match(Set dst (ReplicateS con)); 3940 format %{ "movq $dst,[$constantaddress]\n\t" 3941 "vpbroadcastw $dst,$dst\t! replicate32S" %} 3942 ins_encode %{ 3943 int vector_len = 2; 3944 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3945 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 3951 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 3952 match(Set dst (ReplicateS zero)); 3953 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 3954 ins_encode %{ 3955 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3956 int vector_len = 2; 3957 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3958 %} 3959 ins_pipe( fpu_reg_reg ); 3960 %} 3961 3962 instruct Repl4I_evex(vecX dst, rRegI src) %{ 3963 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3964 match(Set dst (ReplicateI src)); 3965 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 3966 ins_encode %{ 3967 int vector_len = 0; 3968 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3969 %} 3970 ins_pipe( pipe_slow ); 3971 %} 3972 3973 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 3974 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 3975 match(Set dst (ReplicateI (LoadI mem))); 3976 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 3977 ins_encode %{ 3978 int vector_len = 0; 3979 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 3980 %} 3981 ins_pipe( pipe_slow ); 3982 %} 3983 3984 instruct Repl8I_evex(vecY dst, rRegI src) %{ 3985 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3986 match(Set dst (ReplicateI src)); 3987 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 3988 ins_encode %{ 3989 int vector_len = 1; 3990 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 3991 %} 3992 ins_pipe( pipe_slow ); 3993 %} 3994 3995 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 3996 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 3997 match(Set dst (ReplicateI (LoadI mem))); 3998 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 3999 ins_encode %{ 4000 int vector_len = 1; 4001 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4002 %} 4003 ins_pipe( pipe_slow ); 4004 %} 4005 4006 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4007 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4008 match(Set dst (ReplicateI src)); 4009 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4010 ins_encode %{ 4011 int vector_len = 2; 4012 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4013 %} 4014 ins_pipe( pipe_slow ); 4015 %} 4016 4017 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4018 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4019 match(Set dst (ReplicateI (LoadI mem))); 4020 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4021 ins_encode %{ 4022 int vector_len = 2; 4023 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4024 %} 4025 ins_pipe( pipe_slow ); 4026 %} 4027 4028 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4029 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4030 match(Set dst (ReplicateI con)); 4031 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4032 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4033 ins_encode %{ 4034 int vector_len = 0; 4035 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4036 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4037 %} 4038 ins_pipe( pipe_slow ); 4039 %} 4040 4041 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4042 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4043 match(Set dst (ReplicateI con)); 4044 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4045 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4046 ins_encode %{ 4047 int vector_len = 1; 4048 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4049 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4050 %} 4051 ins_pipe( pipe_slow ); 4052 %} 4053 4054 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4055 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4056 match(Set dst (ReplicateI con)); 4057 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4058 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4059 ins_encode %{ 4060 int vector_len = 2; 4061 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4062 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4068 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4069 match(Set dst (ReplicateI zero)); 4070 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4071 ins_encode %{ 4072 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4073 int vector_len = 2; 4074 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4075 %} 4076 ins_pipe( fpu_reg_reg ); 4077 %} 4078 4079 // Replicate long (8 byte) scalar to be vector 4080 #ifdef _LP64 4081 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4082 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4083 match(Set dst (ReplicateL src)); 4084 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4085 ins_encode %{ 4086 int vector_len = 1; 4087 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4093 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4094 match(Set dst (ReplicateL src)); 4095 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4096 ins_encode %{ 4097 int vector_len = 2; 4098 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4099 %} 4100 ins_pipe( pipe_slow ); 4101 %} 4102 #else // _LP64 4103 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4104 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4105 match(Set dst (ReplicateL src)); 4106 effect(TEMP dst, USE src, TEMP tmp); 4107 format %{ "movdl $dst,$src.lo\n\t" 4108 "movdl $tmp,$src.hi\n\t" 4109 "punpckldq $dst,$tmp\n\t" 4110 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4111 ins_encode %{ 4112 int vector_len = 1; 4113 __ movdl($dst$$XMMRegister, $src$$Register); 4114 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4115 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4116 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4117 %} 4118 ins_pipe( pipe_slow ); 4119 %} 4120 4121 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4122 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4123 match(Set dst (ReplicateL src)); 4124 effect(TEMP dst, USE src, TEMP tmp); 4125 format %{ "movdl $dst,$src.lo\n\t" 4126 "movdl $tmp,$src.hi\n\t" 4127 "punpckldq $dst,$tmp\n\t" 4128 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4129 ins_encode %{ 4130 int vector_len = 2; 4131 __ movdl($dst$$XMMRegister, $src$$Register); 4132 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4133 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4134 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4135 %} 4136 ins_pipe( pipe_slow ); 4137 %} 4138 #endif // _LP64 4139 4140 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4141 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4142 match(Set dst (ReplicateL con)); 4143 format %{ "movq $dst,[$constantaddress]\n\t" 4144 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4145 ins_encode %{ 4146 int vector_len = 1; 4147 __ movq($dst$$XMMRegister, $constantaddress($con)); 4148 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4154 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4155 match(Set dst (ReplicateL con)); 4156 format %{ "movq $dst,[$constantaddress]\n\t" 4157 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4158 ins_encode %{ 4159 int vector_len = 2; 4160 __ movq($dst$$XMMRegister, $constantaddress($con)); 4161 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4162 %} 4163 ins_pipe( pipe_slow ); 4164 %} 4165 4166 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4167 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4168 match(Set dst (ReplicateL (LoadL mem))); 4169 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4170 ins_encode %{ 4171 int vector_len = 0; 4172 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4173 %} 4174 ins_pipe( pipe_slow ); 4175 %} 4176 4177 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4178 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4179 match(Set dst (ReplicateL (LoadL mem))); 4180 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4181 ins_encode %{ 4182 int vector_len = 1; 4183 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4184 %} 4185 ins_pipe( pipe_slow ); 4186 %} 4187 4188 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4189 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4190 match(Set dst (ReplicateL (LoadL mem))); 4191 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4192 ins_encode %{ 4193 int vector_len = 2; 4194 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4195 %} 4196 ins_pipe( pipe_slow ); 4197 %} 4198 4199 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4200 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4201 match(Set dst (ReplicateL zero)); 4202 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4203 ins_encode %{ 4204 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4205 int vector_len = 2; 4206 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4207 %} 4208 ins_pipe( fpu_reg_reg ); 4209 %} 4210 4211 instruct Repl8F_evex(vecY dst, regF src) %{ 4212 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4213 match(Set dst (ReplicateF src)); 4214 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4215 ins_encode %{ 4216 int vector_len = 1; 4217 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4218 %} 4219 ins_pipe( pipe_slow ); 4220 %} 4221 4222 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4223 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4224 match(Set dst (ReplicateF (LoadF mem))); 4225 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4226 ins_encode %{ 4227 int vector_len = 1; 4228 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4229 %} 4230 ins_pipe( pipe_slow ); 4231 %} 4232 4233 instruct Repl16F_evex(vecZ dst, regF src) %{ 4234 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4235 match(Set dst (ReplicateF src)); 4236 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4237 ins_encode %{ 4238 int vector_len = 2; 4239 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4240 %} 4241 ins_pipe( pipe_slow ); 4242 %} 4243 4244 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4245 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4246 match(Set dst (ReplicateF (LoadF mem))); 4247 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4248 ins_encode %{ 4249 int vector_len = 2; 4250 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 4255 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4256 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4257 match(Set dst (ReplicateF zero)); 4258 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4259 ins_encode %{ 4260 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4261 int vector_len = 2; 4262 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4263 %} 4264 ins_pipe( fpu_reg_reg ); 4265 %} 4266 4267 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4268 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4269 match(Set dst (ReplicateF zero)); 4270 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4271 ins_encode %{ 4272 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4273 int vector_len = 2; 4274 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4275 %} 4276 ins_pipe( fpu_reg_reg ); 4277 %} 4278 4279 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4280 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4281 match(Set dst (ReplicateF zero)); 4282 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4283 ins_encode %{ 4284 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4285 int vector_len = 2; 4286 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4287 %} 4288 ins_pipe( fpu_reg_reg ); 4289 %} 4290 4291 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4292 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4293 match(Set dst (ReplicateF zero)); 4294 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4295 ins_encode %{ 4296 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4297 int vector_len = 2; 4298 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4299 %} 4300 ins_pipe( fpu_reg_reg ); 4301 %} 4302 4303 instruct Repl4D_evex(vecY dst, regD src) %{ 4304 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4305 match(Set dst (ReplicateD src)); 4306 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4307 ins_encode %{ 4308 int vector_len = 1; 4309 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4310 %} 4311 ins_pipe( pipe_slow ); 4312 %} 4313 4314 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4315 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4316 match(Set dst (ReplicateD (LoadD mem))); 4317 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4318 ins_encode %{ 4319 int vector_len = 1; 4320 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4321 %} 4322 ins_pipe( pipe_slow ); 4323 %} 4324 4325 instruct Repl8D_evex(vecZ dst, regD src) %{ 4326 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4327 match(Set dst (ReplicateD src)); 4328 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4329 ins_encode %{ 4330 int vector_len = 2; 4331 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4332 %} 4333 ins_pipe( pipe_slow ); 4334 %} 4335 4336 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4337 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4338 match(Set dst (ReplicateD (LoadD mem))); 4339 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4340 ins_encode %{ 4341 int vector_len = 2; 4342 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4343 %} 4344 ins_pipe( pipe_slow ); 4345 %} 4346 4347 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4348 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4349 match(Set dst (ReplicateD zero)); 4350 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4351 ins_encode %{ 4352 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4353 int vector_len = 2; 4354 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4355 %} 4356 ins_pipe( fpu_reg_reg ); 4357 %} 4358 4359 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4360 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4361 match(Set dst (ReplicateD zero)); 4362 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4363 ins_encode %{ 4364 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4365 int vector_len = 2; 4366 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4367 %} 4368 ins_pipe( fpu_reg_reg ); 4369 %} 4370 4371 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4372 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4373 match(Set dst (ReplicateD zero)); 4374 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4375 ins_encode %{ 4376 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4377 int vector_len = 2; 4378 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4379 %} 4380 ins_pipe( fpu_reg_reg ); 4381 %} 4382 4383 // ====================REDUCTION ARITHMETIC======================================= 4384 4385 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4386 predicate(UseSSE > 2 && UseAVX == 0); 4387 match(Set dst (AddReductionVI src1 src2)); 4388 effect(TEMP tmp2, TEMP tmp); 4389 format %{ "movdqu $tmp2,$src2\n\t" 4390 "phaddd $tmp2,$tmp2\n\t" 4391 "movd $tmp,$src1\n\t" 4392 "paddd $tmp,$tmp2\n\t" 4393 "movd $dst,$tmp\t! add reduction2I" %} 4394 ins_encode %{ 4395 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4396 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4397 __ movdl($tmp$$XMMRegister, $src1$$Register); 4398 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4399 __ movdl($dst$$Register, $tmp$$XMMRegister); 4400 %} 4401 ins_pipe( pipe_slow ); 4402 %} 4403 4404 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4405 predicate(VM_Version::supports_avxonly()); 4406 match(Set dst (AddReductionVI src1 src2)); 4407 effect(TEMP tmp, TEMP tmp2); 4408 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4409 "movd $tmp2,$src1\n\t" 4410 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4411 "movd $dst,$tmp2\t! add reduction2I" %} 4412 ins_encode %{ 4413 int vector_len = 0; 4414 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4415 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4416 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4417 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4418 %} 4419 ins_pipe( pipe_slow ); 4420 %} 4421 4422 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4423 predicate(UseAVX > 2); 4424 match(Set dst (AddReductionVI src1 src2)); 4425 effect(TEMP tmp, TEMP tmp2); 4426 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4427 "vpaddd $tmp,$src2,$tmp2\n\t" 4428 "movd $tmp2,$src1\n\t" 4429 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4430 "movd $dst,$tmp2\t! add reduction2I" %} 4431 ins_encode %{ 4432 int vector_len = 0; 4433 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4434 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4435 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4436 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4437 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4438 %} 4439 ins_pipe( pipe_slow ); 4440 %} 4441 4442 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4443 predicate(UseSSE > 2 && UseAVX == 0); 4444 match(Set dst (AddReductionVI src1 src2)); 4445 effect(TEMP tmp, TEMP tmp2); 4446 format %{ "movdqu $tmp,$src2\n\t" 4447 "phaddd $tmp,$tmp\n\t" 4448 "phaddd $tmp,$tmp\n\t" 4449 "movd $tmp2,$src1\n\t" 4450 "paddd $tmp2,$tmp\n\t" 4451 "movd $dst,$tmp2\t! add reduction4I" %} 4452 ins_encode %{ 4453 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4454 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4455 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4456 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4457 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4458 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4459 %} 4460 ins_pipe( pipe_slow ); 4461 %} 4462 4463 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4464 predicate(VM_Version::supports_avxonly()); 4465 match(Set dst (AddReductionVI src1 src2)); 4466 effect(TEMP tmp, TEMP tmp2); 4467 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4468 "vphaddd $tmp,$tmp,$tmp\n\t" 4469 "movd $tmp2,$src1\n\t" 4470 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4471 "movd $dst,$tmp2\t! add reduction4I" %} 4472 ins_encode %{ 4473 int vector_len = 0; 4474 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4475 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4476 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4477 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4478 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4479 %} 4480 ins_pipe( pipe_slow ); 4481 %} 4482 4483 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4484 predicate(UseAVX > 2); 4485 match(Set dst (AddReductionVI src1 src2)); 4486 effect(TEMP tmp, TEMP tmp2); 4487 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4488 "vpaddd $tmp,$src2,$tmp2\n\t" 4489 "pshufd $tmp2,$tmp,0x1\n\t" 4490 "vpaddd $tmp,$tmp,$tmp2\n\t" 4491 "movd $tmp2,$src1\n\t" 4492 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4493 "movd $dst,$tmp2\t! add reduction4I" %} 4494 ins_encode %{ 4495 int vector_len = 0; 4496 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4497 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4498 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4499 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4500 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4501 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4502 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4503 %} 4504 ins_pipe( pipe_slow ); 4505 %} 4506 4507 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4508 predicate(VM_Version::supports_avxonly()); 4509 match(Set dst (AddReductionVI src1 src2)); 4510 effect(TEMP tmp, TEMP tmp2); 4511 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4512 "vphaddd $tmp,$tmp,$tmp2\n\t" 4513 "vextracti128_high $tmp2,$tmp\n\t" 4514 "vpaddd $tmp,$tmp,$tmp2\n\t" 4515 "movd $tmp2,$src1\n\t" 4516 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4517 "movd $dst,$tmp2\t! add reduction8I" %} 4518 ins_encode %{ 4519 int vector_len = 1; 4520 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4521 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4522 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4523 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4524 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4525 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4526 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4532 predicate(UseAVX > 2); 4533 match(Set dst (AddReductionVI src1 src2)); 4534 effect(TEMP tmp, TEMP tmp2); 4535 format %{ "vextracti128_high $tmp,$src2\n\t" 4536 "vpaddd $tmp,$tmp,$src2\n\t" 4537 "pshufd $tmp2,$tmp,0xE\n\t" 4538 "vpaddd $tmp,$tmp,$tmp2\n\t" 4539 "pshufd $tmp2,$tmp,0x1\n\t" 4540 "vpaddd $tmp,$tmp,$tmp2\n\t" 4541 "movd $tmp2,$src1\n\t" 4542 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4543 "movd $dst,$tmp2\t! add reduction8I" %} 4544 ins_encode %{ 4545 int vector_len = 0; 4546 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4547 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4548 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4549 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4550 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4551 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4552 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4553 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4554 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4560 predicate(UseAVX > 2); 4561 match(Set dst (AddReductionVI src1 src2)); 4562 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4563 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4564 "vpaddd $tmp3,$tmp3,$src2\n\t" 4565 "vextracti128_high $tmp,$tmp3\n\t" 4566 "vpaddd $tmp,$tmp,$tmp3\n\t" 4567 "pshufd $tmp2,$tmp,0xE\n\t" 4568 "vpaddd $tmp,$tmp,$tmp2\n\t" 4569 "pshufd $tmp2,$tmp,0x1\n\t" 4570 "vpaddd $tmp,$tmp,$tmp2\n\t" 4571 "movd $tmp2,$src1\n\t" 4572 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4573 "movd $dst,$tmp2\t! mul reduction16I" %} 4574 ins_encode %{ 4575 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4576 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4577 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4578 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4579 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4580 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4581 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4582 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4583 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4584 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4585 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 #ifdef _LP64 4591 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4592 predicate(UseAVX > 2); 4593 match(Set dst (AddReductionVL src1 src2)); 4594 effect(TEMP tmp, TEMP tmp2); 4595 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4596 "vpaddq $tmp,$src2,$tmp2\n\t" 4597 "movdq $tmp2,$src1\n\t" 4598 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4599 "movdq $dst,$tmp2\t! add reduction2L" %} 4600 ins_encode %{ 4601 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4602 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4603 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4604 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4605 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 4610 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4611 predicate(UseAVX > 2); 4612 match(Set dst (AddReductionVL src1 src2)); 4613 effect(TEMP tmp, TEMP tmp2); 4614 format %{ "vextracti128_high $tmp,$src2\n\t" 4615 "vpaddq $tmp2,$tmp,$src2\n\t" 4616 "pshufd $tmp,$tmp2,0xE\n\t" 4617 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4618 "movdq $tmp,$src1\n\t" 4619 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4620 "movdq $dst,$tmp2\t! add reduction4L" %} 4621 ins_encode %{ 4622 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4623 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4624 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4625 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4626 __ movdq($tmp$$XMMRegister, $src1$$Register); 4627 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4628 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4629 %} 4630 ins_pipe( pipe_slow ); 4631 %} 4632 4633 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4634 predicate(UseAVX > 2); 4635 match(Set dst (AddReductionVL src1 src2)); 4636 effect(TEMP tmp, TEMP tmp2); 4637 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4638 "vpaddq $tmp2,$tmp2,$src2\n\t" 4639 "vextracti128_high $tmp,$tmp2\n\t" 4640 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4641 "pshufd $tmp,$tmp2,0xE\n\t" 4642 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4643 "movdq $tmp,$src1\n\t" 4644 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4645 "movdq $dst,$tmp2\t! add reduction8L" %} 4646 ins_encode %{ 4647 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4648 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4649 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4650 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4651 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4652 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4653 __ movdq($tmp$$XMMRegister, $src1$$Register); 4654 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4655 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4656 %} 4657 ins_pipe( pipe_slow ); 4658 %} 4659 #endif 4660 4661 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4662 predicate(UseSSE >= 1 && UseAVX == 0); 4663 match(Set dst (AddReductionVF dst src2)); 4664 effect(TEMP dst, TEMP tmp); 4665 format %{ "addss $dst,$src2\n\t" 4666 "pshufd $tmp,$src2,0x01\n\t" 4667 "addss $dst,$tmp\t! add reduction2F" %} 4668 ins_encode %{ 4669 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4670 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4671 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4672 %} 4673 ins_pipe( pipe_slow ); 4674 %} 4675 4676 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4677 predicate(UseAVX > 0); 4678 match(Set dst (AddReductionVF dst src2)); 4679 effect(TEMP dst, TEMP tmp); 4680 format %{ "vaddss $dst,$dst,$src2\n\t" 4681 "pshufd $tmp,$src2,0x01\n\t" 4682 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4683 ins_encode %{ 4684 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4685 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4686 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4687 %} 4688 ins_pipe( pipe_slow ); 4689 %} 4690 4691 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4692 predicate(UseSSE >= 1 && UseAVX == 0); 4693 match(Set dst (AddReductionVF dst src2)); 4694 effect(TEMP dst, TEMP tmp); 4695 format %{ "addss $dst,$src2\n\t" 4696 "pshufd $tmp,$src2,0x01\n\t" 4697 "addss $dst,$tmp\n\t" 4698 "pshufd $tmp,$src2,0x02\n\t" 4699 "addss $dst,$tmp\n\t" 4700 "pshufd $tmp,$src2,0x03\n\t" 4701 "addss $dst,$tmp\t! add reduction4F" %} 4702 ins_encode %{ 4703 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4704 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4705 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4706 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4707 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4708 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4709 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4710 %} 4711 ins_pipe( pipe_slow ); 4712 %} 4713 4714 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4715 predicate(UseAVX > 0); 4716 match(Set dst (AddReductionVF dst src2)); 4717 effect(TEMP tmp, TEMP dst); 4718 format %{ "vaddss $dst,dst,$src2\n\t" 4719 "pshufd $tmp,$src2,0x01\n\t" 4720 "vaddss $dst,$dst,$tmp\n\t" 4721 "pshufd $tmp,$src2,0x02\n\t" 4722 "vaddss $dst,$dst,$tmp\n\t" 4723 "pshufd $tmp,$src2,0x03\n\t" 4724 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4725 ins_encode %{ 4726 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4727 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4728 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4729 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4730 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4731 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4732 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4738 predicate(UseAVX > 0); 4739 match(Set dst (AddReductionVF dst src2)); 4740 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4741 format %{ "vaddss $dst,$dst,$src2\n\t" 4742 "pshufd $tmp,$src2,0x01\n\t" 4743 "vaddss $dst,$dst,$tmp\n\t" 4744 "pshufd $tmp,$src2,0x02\n\t" 4745 "vaddss $dst,$dst,$tmp\n\t" 4746 "pshufd $tmp,$src2,0x03\n\t" 4747 "vaddss $dst,$dst,$tmp\n\t" 4748 "vextractf128_high $tmp2,$src2\n\t" 4749 "vaddss $dst,$dst,$tmp2\n\t" 4750 "pshufd $tmp,$tmp2,0x01\n\t" 4751 "vaddss $dst,$dst,$tmp\n\t" 4752 "pshufd $tmp,$tmp2,0x02\n\t" 4753 "vaddss $dst,$dst,$tmp\n\t" 4754 "pshufd $tmp,$tmp2,0x03\n\t" 4755 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4756 ins_encode %{ 4757 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4758 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4759 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4760 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4761 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4762 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4763 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4764 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4765 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4766 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4767 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4768 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4769 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4770 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4771 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4772 %} 4773 ins_pipe( pipe_slow ); 4774 %} 4775 4776 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4777 predicate(UseAVX > 2); 4778 match(Set dst (AddReductionVF dst src2)); 4779 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4780 format %{ "vaddss $dst,$dst,$src2\n\t" 4781 "pshufd $tmp,$src2,0x01\n\t" 4782 "vaddss $dst,$dst,$tmp\n\t" 4783 "pshufd $tmp,$src2,0x02\n\t" 4784 "vaddss $dst,$dst,$tmp\n\t" 4785 "pshufd $tmp,$src2,0x03\n\t" 4786 "vaddss $dst,$dst,$tmp\n\t" 4787 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4788 "vaddss $dst,$dst,$tmp2\n\t" 4789 "pshufd $tmp,$tmp2,0x01\n\t" 4790 "vaddss $dst,$dst,$tmp\n\t" 4791 "pshufd $tmp,$tmp2,0x02\n\t" 4792 "vaddss $dst,$dst,$tmp\n\t" 4793 "pshufd $tmp,$tmp2,0x03\n\t" 4794 "vaddss $dst,$dst,$tmp\n\t" 4795 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4796 "vaddss $dst,$dst,$tmp2\n\t" 4797 "pshufd $tmp,$tmp2,0x01\n\t" 4798 "vaddss $dst,$dst,$tmp\n\t" 4799 "pshufd $tmp,$tmp2,0x02\n\t" 4800 "vaddss $dst,$dst,$tmp\n\t" 4801 "pshufd $tmp,$tmp2,0x03\n\t" 4802 "vaddss $dst,$dst,$tmp\n\t" 4803 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4804 "vaddss $dst,$dst,$tmp2\n\t" 4805 "pshufd $tmp,$tmp2,0x01\n\t" 4806 "vaddss $dst,$dst,$tmp\n\t" 4807 "pshufd $tmp,$tmp2,0x02\n\t" 4808 "vaddss $dst,$dst,$tmp\n\t" 4809 "pshufd $tmp,$tmp2,0x03\n\t" 4810 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4811 ins_encode %{ 4812 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4813 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4814 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4815 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4816 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4817 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4818 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4819 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4820 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4821 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4822 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4823 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4824 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4825 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4826 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4827 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4828 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4829 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4830 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4831 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4832 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4833 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4834 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4835 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4836 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4837 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4838 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4839 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4840 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4841 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4842 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4843 %} 4844 ins_pipe( pipe_slow ); 4845 %} 4846 4847 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4848 predicate(UseSSE >= 1 && UseAVX == 0); 4849 match(Set dst (AddReductionVD dst src2)); 4850 effect(TEMP tmp, TEMP dst); 4851 format %{ "addsd $dst,$src2\n\t" 4852 "pshufd $tmp,$src2,0xE\n\t" 4853 "addsd $dst,$tmp\t! add reduction2D" %} 4854 ins_encode %{ 4855 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4856 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4857 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 4858 %} 4859 ins_pipe( pipe_slow ); 4860 %} 4861 4862 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4863 predicate(UseAVX > 0); 4864 match(Set dst (AddReductionVD dst src2)); 4865 effect(TEMP tmp, TEMP dst); 4866 format %{ "vaddsd $dst,$dst,$src2\n\t" 4867 "pshufd $tmp,$src2,0xE\n\t" 4868 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 4869 ins_encode %{ 4870 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4871 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4872 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4873 %} 4874 ins_pipe( pipe_slow ); 4875 %} 4876 4877 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 4878 predicate(UseAVX > 0); 4879 match(Set dst (AddReductionVD dst src2)); 4880 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4881 format %{ "vaddsd $dst,$dst,$src2\n\t" 4882 "pshufd $tmp,$src2,0xE\n\t" 4883 "vaddsd $dst,$dst,$tmp\n\t" 4884 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4885 "vaddsd $dst,$dst,$tmp2\n\t" 4886 "pshufd $tmp,$tmp2,0xE\n\t" 4887 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 4888 ins_encode %{ 4889 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4890 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4891 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4892 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4893 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4894 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4895 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4896 %} 4897 ins_pipe( pipe_slow ); 4898 %} 4899 4900 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 4901 predicate(UseAVX > 2); 4902 match(Set dst (AddReductionVD dst src2)); 4903 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4904 format %{ "vaddsd $dst,$dst,$src2\n\t" 4905 "pshufd $tmp,$src2,0xE\n\t" 4906 "vaddsd $dst,$dst,$tmp\n\t" 4907 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4908 "vaddsd $dst,$dst,$tmp2\n\t" 4909 "pshufd $tmp,$tmp2,0xE\n\t" 4910 "vaddsd $dst,$dst,$tmp\n\t" 4911 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4912 "vaddsd $dst,$dst,$tmp2\n\t" 4913 "pshufd $tmp,$tmp2,0xE\n\t" 4914 "vaddsd $dst,$dst,$tmp\n\t" 4915 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4916 "vaddsd $dst,$dst,$tmp2\n\t" 4917 "pshufd $tmp,$tmp2,0xE\n\t" 4918 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 4919 ins_encode %{ 4920 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4921 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4922 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4923 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4924 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4925 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4926 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4927 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4928 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4929 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4930 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4931 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4932 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4933 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4934 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4935 %} 4936 ins_pipe( pipe_slow ); 4937 %} 4938 4939 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4940 predicate(UseSSE > 3 && UseAVX == 0); 4941 match(Set dst (MulReductionVI src1 src2)); 4942 effect(TEMP tmp, TEMP tmp2); 4943 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4944 "pmulld $tmp2,$src2\n\t" 4945 "movd $tmp,$src1\n\t" 4946 "pmulld $tmp2,$tmp\n\t" 4947 "movd $dst,$tmp2\t! mul reduction2I" %} 4948 ins_encode %{ 4949 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4950 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4951 __ movdl($tmp$$XMMRegister, $src1$$Register); 4952 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4953 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4959 predicate(UseAVX > 0); 4960 match(Set dst (MulReductionVI src1 src2)); 4961 effect(TEMP tmp, TEMP tmp2); 4962 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4963 "vpmulld $tmp,$src2,$tmp2\n\t" 4964 "movd $tmp2,$src1\n\t" 4965 "vpmulld $tmp2,$tmp,$tmp2\n\t" 4966 "movd $dst,$tmp2\t! mul reduction2I" %} 4967 ins_encode %{ 4968 int vector_len = 0; 4969 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4970 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4971 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4972 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4973 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4974 %} 4975 ins_pipe( pipe_slow ); 4976 %} 4977 4978 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4979 predicate(UseSSE > 3 && UseAVX == 0); 4980 match(Set dst (MulReductionVI src1 src2)); 4981 effect(TEMP tmp, TEMP tmp2); 4982 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4983 "pmulld $tmp2,$src2\n\t" 4984 "pshufd $tmp,$tmp2,0x1\n\t" 4985 "pmulld $tmp2,$tmp\n\t" 4986 "movd $tmp,$src1\n\t" 4987 "pmulld $tmp2,$tmp\n\t" 4988 "movd $dst,$tmp2\t! mul reduction4I" %} 4989 ins_encode %{ 4990 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4991 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 4992 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 4993 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4994 __ movdl($tmp$$XMMRegister, $src1$$Register); 4995 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 4996 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4997 %} 4998 ins_pipe( pipe_slow ); 4999 %} 5000 5001 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5002 predicate(UseAVX > 0); 5003 match(Set dst (MulReductionVI src1 src2)); 5004 effect(TEMP tmp, TEMP tmp2); 5005 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5006 "vpmulld $tmp,$src2,$tmp2\n\t" 5007 "pshufd $tmp2,$tmp,0x1\n\t" 5008 "vpmulld $tmp,$tmp,$tmp2\n\t" 5009 "movd $tmp2,$src1\n\t" 5010 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5011 "movd $dst,$tmp2\t! mul reduction4I" %} 5012 ins_encode %{ 5013 int vector_len = 0; 5014 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5015 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5016 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5017 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5018 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5019 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5020 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5021 %} 5022 ins_pipe( pipe_slow ); 5023 %} 5024 5025 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5026 predicate(UseAVX > 0); 5027 match(Set dst (MulReductionVI src1 src2)); 5028 effect(TEMP tmp, TEMP tmp2); 5029 format %{ "vextracti128_high $tmp,$src2\n\t" 5030 "vpmulld $tmp,$tmp,$src2\n\t" 5031 "pshufd $tmp2,$tmp,0xE\n\t" 5032 "vpmulld $tmp,$tmp,$tmp2\n\t" 5033 "pshufd $tmp2,$tmp,0x1\n\t" 5034 "vpmulld $tmp,$tmp,$tmp2\n\t" 5035 "movd $tmp2,$src1\n\t" 5036 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5037 "movd $dst,$tmp2\t! mul reduction8I" %} 5038 ins_encode %{ 5039 int vector_len = 0; 5040 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5041 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5042 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5043 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5044 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5045 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5046 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5047 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5048 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5049 %} 5050 ins_pipe( pipe_slow ); 5051 %} 5052 5053 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5054 predicate(UseAVX > 2); 5055 match(Set dst (MulReductionVI src1 src2)); 5056 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5057 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5058 "vpmulld $tmp3,$tmp3,$src2\n\t" 5059 "vextracti128_high $tmp,$tmp3\n\t" 5060 "vpmulld $tmp,$tmp,$src2\n\t" 5061 "pshufd $tmp2,$tmp,0xE\n\t" 5062 "vpmulld $tmp,$tmp,$tmp2\n\t" 5063 "pshufd $tmp2,$tmp,0x1\n\t" 5064 "vpmulld $tmp,$tmp,$tmp2\n\t" 5065 "movd $tmp2,$src1\n\t" 5066 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5067 "movd $dst,$tmp2\t! mul reduction16I" %} 5068 ins_encode %{ 5069 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5070 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5071 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5072 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5073 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5074 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5075 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5076 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5077 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5078 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5079 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5080 %} 5081 ins_pipe( pipe_slow ); 5082 %} 5083 5084 #ifdef _LP64 5085 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5086 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5087 match(Set dst (MulReductionVL src1 src2)); 5088 effect(TEMP tmp, TEMP tmp2); 5089 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5090 "vpmullq $tmp,$src2,$tmp2\n\t" 5091 "movdq $tmp2,$src1\n\t" 5092 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5093 "movdq $dst,$tmp2\t! mul reduction2L" %} 5094 ins_encode %{ 5095 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5096 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5097 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5098 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5099 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5100 %} 5101 ins_pipe( pipe_slow ); 5102 %} 5103 5104 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5105 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5106 match(Set dst (MulReductionVL src1 src2)); 5107 effect(TEMP tmp, TEMP tmp2); 5108 format %{ "vextracti128_high $tmp,$src2\n\t" 5109 "vpmullq $tmp2,$tmp,$src2\n\t" 5110 "pshufd $tmp,$tmp2,0xE\n\t" 5111 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5112 "movdq $tmp,$src1\n\t" 5113 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5114 "movdq $dst,$tmp2\t! mul reduction4L" %} 5115 ins_encode %{ 5116 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5117 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5118 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5119 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5120 __ movdq($tmp$$XMMRegister, $src1$$Register); 5121 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5122 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5123 %} 5124 ins_pipe( pipe_slow ); 5125 %} 5126 5127 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5128 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5129 match(Set dst (MulReductionVL src1 src2)); 5130 effect(TEMP tmp, TEMP tmp2); 5131 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5132 "vpmullq $tmp2,$tmp2,$src2\n\t" 5133 "vextracti128_high $tmp,$tmp2\n\t" 5134 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5135 "pshufd $tmp,$tmp2,0xE\n\t" 5136 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5137 "movdq $tmp,$src1\n\t" 5138 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5139 "movdq $dst,$tmp2\t! mul reduction8L" %} 5140 ins_encode %{ 5141 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5142 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5143 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5144 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5145 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5146 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5147 __ movdq($tmp$$XMMRegister, $src1$$Register); 5148 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5149 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5150 %} 5151 ins_pipe( pipe_slow ); 5152 %} 5153 #endif 5154 5155 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5156 predicate(UseSSE >= 1 && UseAVX == 0); 5157 match(Set dst (MulReductionVF dst src2)); 5158 effect(TEMP dst, TEMP tmp); 5159 format %{ "mulss $dst,$src2\n\t" 5160 "pshufd $tmp,$src2,0x01\n\t" 5161 "mulss $dst,$tmp\t! mul reduction2F" %} 5162 ins_encode %{ 5163 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5164 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5165 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5166 %} 5167 ins_pipe( pipe_slow ); 5168 %} 5169 5170 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5171 predicate(UseAVX > 0); 5172 match(Set dst (MulReductionVF dst src2)); 5173 effect(TEMP tmp, TEMP dst); 5174 format %{ "vmulss $dst,$dst,$src2\n\t" 5175 "pshufd $tmp,$src2,0x01\n\t" 5176 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5177 ins_encode %{ 5178 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5179 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5180 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5181 %} 5182 ins_pipe( pipe_slow ); 5183 %} 5184 5185 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5186 predicate(UseSSE >= 1 && UseAVX == 0); 5187 match(Set dst (MulReductionVF dst src2)); 5188 effect(TEMP dst, TEMP tmp); 5189 format %{ "mulss $dst,$src2\n\t" 5190 "pshufd $tmp,$src2,0x01\n\t" 5191 "mulss $dst,$tmp\n\t" 5192 "pshufd $tmp,$src2,0x02\n\t" 5193 "mulss $dst,$tmp\n\t" 5194 "pshufd $tmp,$src2,0x03\n\t" 5195 "mulss $dst,$tmp\t! mul reduction4F" %} 5196 ins_encode %{ 5197 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5198 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5199 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5200 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5201 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5202 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5203 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5204 %} 5205 ins_pipe( pipe_slow ); 5206 %} 5207 5208 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5209 predicate(UseAVX > 0); 5210 match(Set dst (MulReductionVF dst src2)); 5211 effect(TEMP tmp, TEMP dst); 5212 format %{ "vmulss $dst,$dst,$src2\n\t" 5213 "pshufd $tmp,$src2,0x01\n\t" 5214 "vmulss $dst,$dst,$tmp\n\t" 5215 "pshufd $tmp,$src2,0x02\n\t" 5216 "vmulss $dst,$dst,$tmp\n\t" 5217 "pshufd $tmp,$src2,0x03\n\t" 5218 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5219 ins_encode %{ 5220 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5221 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5222 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5223 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5224 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5225 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5226 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5227 %} 5228 ins_pipe( pipe_slow ); 5229 %} 5230 5231 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5232 predicate(UseAVX > 0); 5233 match(Set dst (MulReductionVF dst src2)); 5234 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5235 format %{ "vmulss $dst,$dst,$src2\n\t" 5236 "pshufd $tmp,$src2,0x01\n\t" 5237 "vmulss $dst,$dst,$tmp\n\t" 5238 "pshufd $tmp,$src2,0x02\n\t" 5239 "vmulss $dst,$dst,$tmp\n\t" 5240 "pshufd $tmp,$src2,0x03\n\t" 5241 "vmulss $dst,$dst,$tmp\n\t" 5242 "vextractf128_high $tmp2,$src2\n\t" 5243 "vmulss $dst,$dst,$tmp2\n\t" 5244 "pshufd $tmp,$tmp2,0x01\n\t" 5245 "vmulss $dst,$dst,$tmp\n\t" 5246 "pshufd $tmp,$tmp2,0x02\n\t" 5247 "vmulss $dst,$dst,$tmp\n\t" 5248 "pshufd $tmp,$tmp2,0x03\n\t" 5249 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5250 ins_encode %{ 5251 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5252 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5253 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5254 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5255 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5256 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5257 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5258 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5259 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5260 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5261 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5262 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5263 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5264 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5265 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5271 predicate(UseAVX > 2); 5272 match(Set dst (MulReductionVF dst src2)); 5273 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5274 format %{ "vmulss $dst,$dst,$src2\n\t" 5275 "pshufd $tmp,$src2,0x01\n\t" 5276 "vmulss $dst,$dst,$tmp\n\t" 5277 "pshufd $tmp,$src2,0x02\n\t" 5278 "vmulss $dst,$dst,$tmp\n\t" 5279 "pshufd $tmp,$src2,0x03\n\t" 5280 "vmulss $dst,$dst,$tmp\n\t" 5281 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5282 "vmulss $dst,$dst,$tmp2\n\t" 5283 "pshufd $tmp,$tmp2,0x01\n\t" 5284 "vmulss $dst,$dst,$tmp\n\t" 5285 "pshufd $tmp,$tmp2,0x02\n\t" 5286 "vmulss $dst,$dst,$tmp\n\t" 5287 "pshufd $tmp,$tmp2,0x03\n\t" 5288 "vmulss $dst,$dst,$tmp\n\t" 5289 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5290 "vmulss $dst,$dst,$tmp2\n\t" 5291 "pshufd $tmp,$tmp2,0x01\n\t" 5292 "vmulss $dst,$dst,$tmp\n\t" 5293 "pshufd $tmp,$tmp2,0x02\n\t" 5294 "vmulss $dst,$dst,$tmp\n\t" 5295 "pshufd $tmp,$tmp2,0x03\n\t" 5296 "vmulss $dst,$dst,$tmp\n\t" 5297 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5298 "vmulss $dst,$dst,$tmp2\n\t" 5299 "pshufd $tmp,$tmp2,0x01\n\t" 5300 "vmulss $dst,$dst,$tmp\n\t" 5301 "pshufd $tmp,$tmp2,0x02\n\t" 5302 "vmulss $dst,$dst,$tmp\n\t" 5303 "pshufd $tmp,$tmp2,0x03\n\t" 5304 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5305 ins_encode %{ 5306 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5307 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5308 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5309 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5310 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5311 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5312 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5313 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5314 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5315 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5316 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5317 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5318 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5319 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5320 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5321 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5322 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5323 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5324 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5325 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5326 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5327 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5328 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5329 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5330 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5331 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5332 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5333 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5334 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5335 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5336 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5337 %} 5338 ins_pipe( pipe_slow ); 5339 %} 5340 5341 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5342 predicate(UseSSE >= 1 && UseAVX == 0); 5343 match(Set dst (MulReductionVD dst src2)); 5344 effect(TEMP dst, TEMP tmp); 5345 format %{ "mulsd $dst,$src2\n\t" 5346 "pshufd $tmp,$src2,0xE\n\t" 5347 "mulsd $dst,$tmp\t! mul reduction2D" %} 5348 ins_encode %{ 5349 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5350 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5351 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5352 %} 5353 ins_pipe( pipe_slow ); 5354 %} 5355 5356 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5357 predicate(UseAVX > 0); 5358 match(Set dst (MulReductionVD dst src2)); 5359 effect(TEMP tmp, TEMP dst); 5360 format %{ "vmulsd $dst,$dst,$src2\n\t" 5361 "pshufd $tmp,$src2,0xE\n\t" 5362 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5363 ins_encode %{ 5364 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5365 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5366 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5372 predicate(UseAVX > 0); 5373 match(Set dst (MulReductionVD dst src2)); 5374 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5375 format %{ "vmulsd $dst,$dst,$src2\n\t" 5376 "pshufd $tmp,$src2,0xE\n\t" 5377 "vmulsd $dst,$dst,$tmp\n\t" 5378 "vextractf128_high $tmp2,$src2\n\t" 5379 "vmulsd $dst,$dst,$tmp2\n\t" 5380 "pshufd $tmp,$tmp2,0xE\n\t" 5381 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5382 ins_encode %{ 5383 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5384 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5385 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5386 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5387 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5388 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5389 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5395 predicate(UseAVX > 2); 5396 match(Set dst (MulReductionVD dst src2)); 5397 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5398 format %{ "vmulsd $dst,$dst,$src2\n\t" 5399 "pshufd $tmp,$src2,0xE\n\t" 5400 "vmulsd $dst,$dst,$tmp\n\t" 5401 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5402 "vmulsd $dst,$dst,$tmp2\n\t" 5403 "pshufd $tmp,$src2,0xE\n\t" 5404 "vmulsd $dst,$dst,$tmp\n\t" 5405 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5406 "vmulsd $dst,$dst,$tmp2\n\t" 5407 "pshufd $tmp,$tmp2,0xE\n\t" 5408 "vmulsd $dst,$dst,$tmp\n\t" 5409 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5410 "vmulsd $dst,$dst,$tmp2\n\t" 5411 "pshufd $tmp,$tmp2,0xE\n\t" 5412 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5413 ins_encode %{ 5414 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5415 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5416 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5417 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5418 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5419 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5420 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5421 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5422 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5423 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5424 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5425 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5426 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5427 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5428 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5429 %} 5430 ins_pipe( pipe_slow ); 5431 %} 5432 5433 // ====================VECTOR ARITHMETIC======================================= 5434 5435 // --------------------------------- ADD -------------------------------------- 5436 5437 // Bytes vector add 5438 instruct vadd4B(vecS dst, vecS src) %{ 5439 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5440 match(Set dst (AddVB dst src)); 5441 format %{ "paddb $dst,$src\t! add packed4B" %} 5442 ins_encode %{ 5443 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5444 %} 5445 ins_pipe( pipe_slow ); 5446 %} 5447 5448 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5449 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5450 match(Set dst (AddVB src1 src2)); 5451 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5452 ins_encode %{ 5453 int vector_len = 0; 5454 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5455 %} 5456 ins_pipe( pipe_slow ); 5457 %} 5458 5459 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5460 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5461 match(Set dst (AddVB src1 src2)); 5462 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5463 ins_encode %{ 5464 int vector_len = 0; 5465 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5466 %} 5467 ins_pipe( pipe_slow ); 5468 %} 5469 5470 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5471 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5472 match(Set dst (AddVB dst src2)); 5473 effect(TEMP src1); 5474 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5475 ins_encode %{ 5476 int vector_len = 0; 5477 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5478 %} 5479 ins_pipe( pipe_slow ); 5480 %} 5481 5482 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5483 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5484 match(Set dst (AddVB src (LoadVector mem))); 5485 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5486 ins_encode %{ 5487 int vector_len = 0; 5488 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5489 %} 5490 ins_pipe( pipe_slow ); 5491 %} 5492 5493 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5494 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5495 match(Set dst (AddVB src (LoadVector mem))); 5496 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5497 ins_encode %{ 5498 int vector_len = 0; 5499 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5505 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5506 match(Set dst (AddVB dst (LoadVector mem))); 5507 effect(TEMP src); 5508 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5509 ins_encode %{ 5510 int vector_len = 0; 5511 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5512 %} 5513 ins_pipe( pipe_slow ); 5514 %} 5515 5516 instruct vadd8B(vecD dst, vecD src) %{ 5517 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5518 match(Set dst (AddVB dst src)); 5519 format %{ "paddb $dst,$src\t! add packed8B" %} 5520 ins_encode %{ 5521 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5522 %} 5523 ins_pipe( pipe_slow ); 5524 %} 5525 5526 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5527 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5528 match(Set dst (AddVB src1 src2)); 5529 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5530 ins_encode %{ 5531 int vector_len = 0; 5532 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5533 %} 5534 ins_pipe( pipe_slow ); 5535 %} 5536 5537 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5538 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5539 match(Set dst (AddVB src1 src2)); 5540 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5541 ins_encode %{ 5542 int vector_len = 0; 5543 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5544 %} 5545 ins_pipe( pipe_slow ); 5546 %} 5547 5548 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5549 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5550 match(Set dst (AddVB dst src2)); 5551 effect(TEMP src1); 5552 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5553 ins_encode %{ 5554 int vector_len = 0; 5555 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5556 %} 5557 ins_pipe( pipe_slow ); 5558 %} 5559 5560 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5561 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5562 match(Set dst (AddVB src (LoadVector mem))); 5563 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5564 ins_encode %{ 5565 int vector_len = 0; 5566 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5567 %} 5568 ins_pipe( pipe_slow ); 5569 %} 5570 5571 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5572 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5573 match(Set dst (AddVB src (LoadVector mem))); 5574 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5575 ins_encode %{ 5576 int vector_len = 0; 5577 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5583 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5584 match(Set dst (AddVB dst (LoadVector mem))); 5585 effect(TEMP src); 5586 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5587 ins_encode %{ 5588 int vector_len = 0; 5589 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5590 %} 5591 ins_pipe( pipe_slow ); 5592 %} 5593 5594 instruct vadd16B(vecX dst, vecX src) %{ 5595 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5596 match(Set dst (AddVB dst src)); 5597 format %{ "paddb $dst,$src\t! add packed16B" %} 5598 ins_encode %{ 5599 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5600 %} 5601 ins_pipe( pipe_slow ); 5602 %} 5603 5604 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5605 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5606 match(Set dst (AddVB src1 src2)); 5607 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5608 ins_encode %{ 5609 int vector_len = 0; 5610 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5611 %} 5612 ins_pipe( pipe_slow ); 5613 %} 5614 5615 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5616 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5617 match(Set dst (AddVB src1 src2)); 5618 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5619 ins_encode %{ 5620 int vector_len = 0; 5621 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5627 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5628 match(Set dst (AddVB dst src2)); 5629 effect(TEMP src1); 5630 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5631 ins_encode %{ 5632 int vector_len = 0; 5633 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5634 %} 5635 ins_pipe( pipe_slow ); 5636 %} 5637 5638 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5639 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5640 match(Set dst (AddVB src (LoadVector mem))); 5641 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5642 ins_encode %{ 5643 int vector_len = 0; 5644 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5645 %} 5646 ins_pipe( pipe_slow ); 5647 %} 5648 5649 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5650 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5651 match(Set dst (AddVB src (LoadVector mem))); 5652 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5653 ins_encode %{ 5654 int vector_len = 0; 5655 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5661 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5662 match(Set dst (AddVB dst (LoadVector mem))); 5663 effect(TEMP src); 5664 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5665 ins_encode %{ 5666 int vector_len = 0; 5667 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5668 %} 5669 ins_pipe( pipe_slow ); 5670 %} 5671 5672 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5673 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5674 match(Set dst (AddVB src1 src2)); 5675 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5676 ins_encode %{ 5677 int vector_len = 1; 5678 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5679 %} 5680 ins_pipe( pipe_slow ); 5681 %} 5682 5683 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5684 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5685 match(Set dst (AddVB src1 src2)); 5686 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5687 ins_encode %{ 5688 int vector_len = 1; 5689 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5695 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5696 match(Set dst (AddVB dst src2)); 5697 effect(TEMP src1); 5698 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5699 ins_encode %{ 5700 int vector_len = 1; 5701 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5702 %} 5703 ins_pipe( pipe_slow ); 5704 %} 5705 5706 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5707 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5708 match(Set dst (AddVB src (LoadVector mem))); 5709 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5710 ins_encode %{ 5711 int vector_len = 1; 5712 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5713 %} 5714 ins_pipe( pipe_slow ); 5715 %} 5716 5717 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5718 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5719 match(Set dst (AddVB src (LoadVector mem))); 5720 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5721 ins_encode %{ 5722 int vector_len = 1; 5723 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5729 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5730 match(Set dst (AddVB dst (LoadVector mem))); 5731 effect(TEMP src); 5732 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5733 ins_encode %{ 5734 int vector_len = 1; 5735 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5736 %} 5737 ins_pipe( pipe_slow ); 5738 %} 5739 5740 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5741 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5742 match(Set dst (AddVB src1 src2)); 5743 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5744 ins_encode %{ 5745 int vector_len = 2; 5746 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5747 %} 5748 ins_pipe( pipe_slow ); 5749 %} 5750 5751 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5752 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5753 match(Set dst (AddVB src (LoadVector mem))); 5754 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5755 ins_encode %{ 5756 int vector_len = 2; 5757 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5758 %} 5759 ins_pipe( pipe_slow ); 5760 %} 5761 5762 // Shorts/Chars vector add 5763 instruct vadd2S(vecS dst, vecS src) %{ 5764 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5765 match(Set dst (AddVS dst src)); 5766 format %{ "paddw $dst,$src\t! add packed2S" %} 5767 ins_encode %{ 5768 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5769 %} 5770 ins_pipe( pipe_slow ); 5771 %} 5772 5773 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5774 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5775 match(Set dst (AddVS src1 src2)); 5776 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5777 ins_encode %{ 5778 int vector_len = 0; 5779 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5780 %} 5781 ins_pipe( pipe_slow ); 5782 %} 5783 5784 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5785 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5786 match(Set dst (AddVS src1 src2)); 5787 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5788 ins_encode %{ 5789 int vector_len = 0; 5790 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5796 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5797 match(Set dst (AddVS dst src2)); 5798 effect(TEMP src1); 5799 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5800 ins_encode %{ 5801 int vector_len = 0; 5802 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5803 %} 5804 ins_pipe( pipe_slow ); 5805 %} 5806 5807 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5808 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5809 match(Set dst (AddVS src (LoadVector mem))); 5810 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5811 ins_encode %{ 5812 int vector_len = 0; 5813 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5819 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5820 match(Set dst (AddVS src (LoadVector mem))); 5821 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5822 ins_encode %{ 5823 int vector_len = 0; 5824 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5825 %} 5826 ins_pipe( pipe_slow ); 5827 %} 5828 5829 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5830 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5831 match(Set dst (AddVS dst (LoadVector mem))); 5832 effect(TEMP src); 5833 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5834 ins_encode %{ 5835 int vector_len = 0; 5836 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 instruct vadd4S(vecD dst, vecD src) %{ 5842 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5843 match(Set dst (AddVS dst src)); 5844 format %{ "paddw $dst,$src\t! add packed4S" %} 5845 ins_encode %{ 5846 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5847 %} 5848 ins_pipe( pipe_slow ); 5849 %} 5850 5851 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5852 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5853 match(Set dst (AddVS src1 src2)); 5854 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5855 ins_encode %{ 5856 int vector_len = 0; 5857 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5858 %} 5859 ins_pipe( pipe_slow ); 5860 %} 5861 5862 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5863 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5864 match(Set dst (AddVS src1 src2)); 5865 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5866 ins_encode %{ 5867 int vector_len = 0; 5868 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5869 %} 5870 ins_pipe( pipe_slow ); 5871 %} 5872 5873 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5874 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5875 match(Set dst (AddVS dst src2)); 5876 effect(TEMP src1); 5877 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 5878 ins_encode %{ 5879 int vector_len = 0; 5880 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5881 %} 5882 ins_pipe( pipe_slow ); 5883 %} 5884 5885 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 5886 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5887 match(Set dst (AddVS src (LoadVector mem))); 5888 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5889 ins_encode %{ 5890 int vector_len = 0; 5891 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5892 %} 5893 ins_pipe( pipe_slow ); 5894 %} 5895 5896 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 5897 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5898 match(Set dst (AddVS src (LoadVector mem))); 5899 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5900 ins_encode %{ 5901 int vector_len = 0; 5902 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5908 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5909 match(Set dst (AddVS dst (LoadVector mem))); 5910 effect(TEMP src); 5911 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 5912 ins_encode %{ 5913 int vector_len = 0; 5914 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5915 %} 5916 ins_pipe( pipe_slow ); 5917 %} 5918 5919 instruct vadd8S(vecX dst, vecX src) %{ 5920 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5921 match(Set dst (AddVS dst src)); 5922 format %{ "paddw $dst,$src\t! add packed8S" %} 5923 ins_encode %{ 5924 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5925 %} 5926 ins_pipe( pipe_slow ); 5927 %} 5928 5929 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5930 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5931 match(Set dst (AddVS src1 src2)); 5932 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5933 ins_encode %{ 5934 int vector_len = 0; 5935 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5936 %} 5937 ins_pipe( pipe_slow ); 5938 %} 5939 5940 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5941 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5942 match(Set dst (AddVS src1 src2)); 5943 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 5944 ins_encode %{ 5945 int vector_len = 0; 5946 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5947 %} 5948 ins_pipe( pipe_slow ); 5949 %} 5950 5951 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5952 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5953 match(Set dst (AddVS dst src2)); 5954 effect(TEMP src1); 5955 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 5956 ins_encode %{ 5957 int vector_len = 0; 5958 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5959 %} 5960 ins_pipe( pipe_slow ); 5961 %} 5962 5963 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 5964 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5965 match(Set dst (AddVS src (LoadVector mem))); 5966 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5967 ins_encode %{ 5968 int vector_len = 0; 5969 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5970 %} 5971 ins_pipe( pipe_slow ); 5972 %} 5973 5974 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 5975 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5976 match(Set dst (AddVS src (LoadVector mem))); 5977 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5978 ins_encode %{ 5979 int vector_len = 0; 5980 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5981 %} 5982 ins_pipe( pipe_slow ); 5983 %} 5984 5985 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5986 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5987 match(Set dst (AddVS dst (LoadVector mem))); 5988 effect(TEMP src); 5989 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 5990 ins_encode %{ 5991 int vector_len = 0; 5992 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5993 %} 5994 ins_pipe( pipe_slow ); 5995 %} 5996 5997 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5998 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 5999 match(Set dst (AddVS src1 src2)); 6000 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6001 ins_encode %{ 6002 int vector_len = 1; 6003 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6004 %} 6005 ins_pipe( pipe_slow ); 6006 %} 6007 6008 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6009 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6010 match(Set dst (AddVS src1 src2)); 6011 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6012 ins_encode %{ 6013 int vector_len = 1; 6014 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6015 %} 6016 ins_pipe( pipe_slow ); 6017 %} 6018 6019 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6020 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6021 match(Set dst (AddVS dst src2)); 6022 effect(TEMP src1); 6023 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6024 ins_encode %{ 6025 int vector_len = 1; 6026 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6027 %} 6028 ins_pipe( pipe_slow ); 6029 %} 6030 6031 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6032 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6033 match(Set dst (AddVS src (LoadVector mem))); 6034 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6035 ins_encode %{ 6036 int vector_len = 1; 6037 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6038 %} 6039 ins_pipe( pipe_slow ); 6040 %} 6041 6042 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6043 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6044 match(Set dst (AddVS src (LoadVector mem))); 6045 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6046 ins_encode %{ 6047 int vector_len = 1; 6048 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6049 %} 6050 ins_pipe( pipe_slow ); 6051 %} 6052 6053 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6054 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6055 match(Set dst (AddVS dst (LoadVector mem))); 6056 effect(TEMP src); 6057 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6058 ins_encode %{ 6059 int vector_len = 1; 6060 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6061 %} 6062 ins_pipe( pipe_slow ); 6063 %} 6064 6065 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6066 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6067 match(Set dst (AddVS src1 src2)); 6068 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6069 ins_encode %{ 6070 int vector_len = 2; 6071 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6072 %} 6073 ins_pipe( pipe_slow ); 6074 %} 6075 6076 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6077 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6078 match(Set dst (AddVS src (LoadVector mem))); 6079 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6080 ins_encode %{ 6081 int vector_len = 2; 6082 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 // Integers vector add 6088 instruct vadd2I(vecD dst, vecD src) %{ 6089 predicate(n->as_Vector()->length() == 2); 6090 match(Set dst (AddVI dst src)); 6091 format %{ "paddd $dst,$src\t! add packed2I" %} 6092 ins_encode %{ 6093 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6094 %} 6095 ins_pipe( pipe_slow ); 6096 %} 6097 6098 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6099 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6100 match(Set dst (AddVI src1 src2)); 6101 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6102 ins_encode %{ 6103 int vector_len = 0; 6104 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6105 %} 6106 ins_pipe( pipe_slow ); 6107 %} 6108 6109 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6111 match(Set dst (AddVI src (LoadVector mem))); 6112 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6113 ins_encode %{ 6114 int vector_len = 0; 6115 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 instruct vadd4I(vecX dst, vecX src) %{ 6121 predicate(n->as_Vector()->length() == 4); 6122 match(Set dst (AddVI dst src)); 6123 format %{ "paddd $dst,$src\t! add packed4I" %} 6124 ins_encode %{ 6125 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6126 %} 6127 ins_pipe( pipe_slow ); 6128 %} 6129 6130 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6131 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6132 match(Set dst (AddVI src1 src2)); 6133 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6134 ins_encode %{ 6135 int vector_len = 0; 6136 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6137 %} 6138 ins_pipe( pipe_slow ); 6139 %} 6140 6141 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6142 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6143 match(Set dst (AddVI src (LoadVector mem))); 6144 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6145 ins_encode %{ 6146 int vector_len = 0; 6147 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6148 %} 6149 ins_pipe( pipe_slow ); 6150 %} 6151 6152 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6153 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6154 match(Set dst (AddVI src1 src2)); 6155 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6156 ins_encode %{ 6157 int vector_len = 1; 6158 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6159 %} 6160 ins_pipe( pipe_slow ); 6161 %} 6162 6163 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6164 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6165 match(Set dst (AddVI src (LoadVector mem))); 6166 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6167 ins_encode %{ 6168 int vector_len = 1; 6169 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6170 %} 6171 ins_pipe( pipe_slow ); 6172 %} 6173 6174 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6175 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6176 match(Set dst (AddVI src1 src2)); 6177 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6178 ins_encode %{ 6179 int vector_len = 2; 6180 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6181 %} 6182 ins_pipe( pipe_slow ); 6183 %} 6184 6185 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6186 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6187 match(Set dst (AddVI src (LoadVector mem))); 6188 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6189 ins_encode %{ 6190 int vector_len = 2; 6191 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 // Longs vector add 6197 instruct vadd2L(vecX dst, vecX src) %{ 6198 predicate(n->as_Vector()->length() == 2); 6199 match(Set dst (AddVL dst src)); 6200 format %{ "paddq $dst,$src\t! add packed2L" %} 6201 ins_encode %{ 6202 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6203 %} 6204 ins_pipe( pipe_slow ); 6205 %} 6206 6207 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6208 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6209 match(Set dst (AddVL src1 src2)); 6210 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6211 ins_encode %{ 6212 int vector_len = 0; 6213 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6219 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6220 match(Set dst (AddVL src (LoadVector mem))); 6221 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6222 ins_encode %{ 6223 int vector_len = 0; 6224 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6230 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6231 match(Set dst (AddVL src1 src2)); 6232 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6233 ins_encode %{ 6234 int vector_len = 1; 6235 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6236 %} 6237 ins_pipe( pipe_slow ); 6238 %} 6239 6240 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6241 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6242 match(Set dst (AddVL src (LoadVector mem))); 6243 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6244 ins_encode %{ 6245 int vector_len = 1; 6246 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6247 %} 6248 ins_pipe( pipe_slow ); 6249 %} 6250 6251 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6252 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6253 match(Set dst (AddVL src1 src2)); 6254 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6255 ins_encode %{ 6256 int vector_len = 2; 6257 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6263 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6264 match(Set dst (AddVL src (LoadVector mem))); 6265 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6266 ins_encode %{ 6267 int vector_len = 2; 6268 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 // Floats vector add 6274 instruct vadd2F(vecD dst, vecD src) %{ 6275 predicate(n->as_Vector()->length() == 2); 6276 match(Set dst (AddVF dst src)); 6277 format %{ "addps $dst,$src\t! add packed2F" %} 6278 ins_encode %{ 6279 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6285 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6286 match(Set dst (AddVF src1 src2)); 6287 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6288 ins_encode %{ 6289 int vector_len = 0; 6290 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6296 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6297 match(Set dst (AddVF src (LoadVector mem))); 6298 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6299 ins_encode %{ 6300 int vector_len = 0; 6301 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vadd4F(vecX dst, vecX src) %{ 6307 predicate(n->as_Vector()->length() == 4); 6308 match(Set dst (AddVF dst src)); 6309 format %{ "addps $dst,$src\t! add packed4F" %} 6310 ins_encode %{ 6311 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6312 %} 6313 ins_pipe( pipe_slow ); 6314 %} 6315 6316 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6318 match(Set dst (AddVF src1 src2)); 6319 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6320 ins_encode %{ 6321 int vector_len = 0; 6322 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6323 %} 6324 ins_pipe( pipe_slow ); 6325 %} 6326 6327 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6328 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6329 match(Set dst (AddVF src (LoadVector mem))); 6330 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6331 ins_encode %{ 6332 int vector_len = 0; 6333 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6334 %} 6335 ins_pipe( pipe_slow ); 6336 %} 6337 6338 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6339 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6340 match(Set dst (AddVF src1 src2)); 6341 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6342 ins_encode %{ 6343 int vector_len = 1; 6344 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6350 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6351 match(Set dst (AddVF src (LoadVector mem))); 6352 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6353 ins_encode %{ 6354 int vector_len = 1; 6355 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6361 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6362 match(Set dst (AddVF src1 src2)); 6363 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6364 ins_encode %{ 6365 int vector_len = 2; 6366 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6367 %} 6368 ins_pipe( pipe_slow ); 6369 %} 6370 6371 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6372 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6373 match(Set dst (AddVF src (LoadVector mem))); 6374 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6375 ins_encode %{ 6376 int vector_len = 2; 6377 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6378 %} 6379 ins_pipe( pipe_slow ); 6380 %} 6381 6382 // Doubles vector add 6383 instruct vadd2D(vecX dst, vecX src) %{ 6384 predicate(n->as_Vector()->length() == 2); 6385 match(Set dst (AddVD dst src)); 6386 format %{ "addpd $dst,$src\t! add packed2D" %} 6387 ins_encode %{ 6388 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6394 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6395 match(Set dst (AddVD src1 src2)); 6396 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6397 ins_encode %{ 6398 int vector_len = 0; 6399 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6400 %} 6401 ins_pipe( pipe_slow ); 6402 %} 6403 6404 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6405 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6406 match(Set dst (AddVD src (LoadVector mem))); 6407 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6408 ins_encode %{ 6409 int vector_len = 0; 6410 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6416 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6417 match(Set dst (AddVD src1 src2)); 6418 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6419 ins_encode %{ 6420 int vector_len = 1; 6421 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6427 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6428 match(Set dst (AddVD src (LoadVector mem))); 6429 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6430 ins_encode %{ 6431 int vector_len = 1; 6432 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6438 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6439 match(Set dst (AddVD src1 src2)); 6440 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6441 ins_encode %{ 6442 int vector_len = 2; 6443 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6449 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6450 match(Set dst (AddVD src (LoadVector mem))); 6451 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6452 ins_encode %{ 6453 int vector_len = 2; 6454 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 // --------------------------------- SUB -------------------------------------- 6460 6461 // Bytes vector sub 6462 instruct vsub4B(vecS dst, vecS src) %{ 6463 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6464 match(Set dst (SubVB dst src)); 6465 format %{ "psubb $dst,$src\t! sub packed4B" %} 6466 ins_encode %{ 6467 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6468 %} 6469 ins_pipe( pipe_slow ); 6470 %} 6471 6472 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6473 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6474 match(Set dst (SubVB src1 src2)); 6475 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6476 ins_encode %{ 6477 int vector_len = 0; 6478 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6479 %} 6480 ins_pipe( pipe_slow ); 6481 %} 6482 6483 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6484 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6485 match(Set dst (SubVB src1 src2)); 6486 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6487 ins_encode %{ 6488 int vector_len = 0; 6489 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6490 %} 6491 ins_pipe( pipe_slow ); 6492 %} 6493 6494 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6495 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6496 match(Set dst (SubVB dst src2)); 6497 effect(TEMP src1); 6498 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6499 ins_encode %{ 6500 int vector_len = 0; 6501 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6502 %} 6503 ins_pipe( pipe_slow ); 6504 %} 6505 6506 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6507 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6508 match(Set dst (SubVB src (LoadVector mem))); 6509 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6510 ins_encode %{ 6511 int vector_len = 0; 6512 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6513 %} 6514 ins_pipe( pipe_slow ); 6515 %} 6516 6517 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6518 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6519 match(Set dst (SubVB src (LoadVector mem))); 6520 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6521 ins_encode %{ 6522 int vector_len = 0; 6523 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6524 %} 6525 ins_pipe( pipe_slow ); 6526 %} 6527 6528 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6529 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6530 match(Set dst (SubVB dst (LoadVector mem))); 6531 effect(TEMP src); 6532 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6533 ins_encode %{ 6534 int vector_len = 0; 6535 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 instruct vsub8B(vecD dst, vecD src) %{ 6541 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6542 match(Set dst (SubVB dst src)); 6543 format %{ "psubb $dst,$src\t! sub packed8B" %} 6544 ins_encode %{ 6545 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6551 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6552 match(Set dst (SubVB src1 src2)); 6553 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6554 ins_encode %{ 6555 int vector_len = 0; 6556 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6557 %} 6558 ins_pipe( pipe_slow ); 6559 %} 6560 6561 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6562 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6563 match(Set dst (SubVB src1 src2)); 6564 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6565 ins_encode %{ 6566 int vector_len = 0; 6567 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6568 %} 6569 ins_pipe( pipe_slow ); 6570 %} 6571 6572 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6573 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6574 match(Set dst (SubVB dst src2)); 6575 effect(TEMP src1); 6576 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6577 ins_encode %{ 6578 int vector_len = 0; 6579 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6580 %} 6581 ins_pipe( pipe_slow ); 6582 %} 6583 6584 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6585 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6586 match(Set dst (SubVB src (LoadVector mem))); 6587 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6588 ins_encode %{ 6589 int vector_len = 0; 6590 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6591 %} 6592 ins_pipe( pipe_slow ); 6593 %} 6594 6595 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6596 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6597 match(Set dst (SubVB src (LoadVector mem))); 6598 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6599 ins_encode %{ 6600 int vector_len = 0; 6601 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6602 %} 6603 ins_pipe( pipe_slow ); 6604 %} 6605 6606 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6607 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6608 match(Set dst (SubVB dst (LoadVector mem))); 6609 effect(TEMP src); 6610 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6611 ins_encode %{ 6612 int vector_len = 0; 6613 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6614 %} 6615 ins_pipe( pipe_slow ); 6616 %} 6617 6618 instruct vsub16B(vecX dst, vecX src) %{ 6619 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6620 match(Set dst (SubVB dst src)); 6621 format %{ "psubb $dst,$src\t! sub packed16B" %} 6622 ins_encode %{ 6623 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6624 %} 6625 ins_pipe( pipe_slow ); 6626 %} 6627 6628 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6629 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6630 match(Set dst (SubVB src1 src2)); 6631 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6632 ins_encode %{ 6633 int vector_len = 0; 6634 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6635 %} 6636 ins_pipe( pipe_slow ); 6637 %} 6638 6639 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6640 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6641 match(Set dst (SubVB src1 src2)); 6642 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6643 ins_encode %{ 6644 int vector_len = 0; 6645 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6646 %} 6647 ins_pipe( pipe_slow ); 6648 %} 6649 6650 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6651 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6652 match(Set dst (SubVB dst src2)); 6653 effect(TEMP src1); 6654 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6655 ins_encode %{ 6656 int vector_len = 0; 6657 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6663 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6664 match(Set dst (SubVB src (LoadVector mem))); 6665 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6666 ins_encode %{ 6667 int vector_len = 0; 6668 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6669 %} 6670 ins_pipe( pipe_slow ); 6671 %} 6672 6673 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6674 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6675 match(Set dst (SubVB src (LoadVector mem))); 6676 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6677 ins_encode %{ 6678 int vector_len = 0; 6679 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6680 %} 6681 ins_pipe( pipe_slow ); 6682 %} 6683 6684 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6685 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6686 match(Set dst (SubVB dst (LoadVector mem))); 6687 effect(TEMP src); 6688 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6689 ins_encode %{ 6690 int vector_len = 0; 6691 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6692 %} 6693 ins_pipe( pipe_slow ); 6694 %} 6695 6696 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6697 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6698 match(Set dst (SubVB src1 src2)); 6699 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6700 ins_encode %{ 6701 int vector_len = 1; 6702 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6703 %} 6704 ins_pipe( pipe_slow ); 6705 %} 6706 6707 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6708 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6709 match(Set dst (SubVB src1 src2)); 6710 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6711 ins_encode %{ 6712 int vector_len = 1; 6713 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6714 %} 6715 ins_pipe( pipe_slow ); 6716 %} 6717 6718 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6719 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6720 match(Set dst (SubVB dst src2)); 6721 effect(TEMP src1); 6722 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6723 ins_encode %{ 6724 int vector_len = 1; 6725 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6726 %} 6727 ins_pipe( pipe_slow ); 6728 %} 6729 6730 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6731 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6732 match(Set dst (SubVB src (LoadVector mem))); 6733 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6734 ins_encode %{ 6735 int vector_len = 1; 6736 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6737 %} 6738 ins_pipe( pipe_slow ); 6739 %} 6740 6741 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6742 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6743 match(Set dst (SubVB src (LoadVector mem))); 6744 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6745 ins_encode %{ 6746 int vector_len = 1; 6747 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6748 %} 6749 ins_pipe( pipe_slow ); 6750 %} 6751 6752 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6753 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6754 match(Set dst (SubVB dst (LoadVector mem))); 6755 effect(TEMP src); 6756 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6757 ins_encode %{ 6758 int vector_len = 1; 6759 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6760 %} 6761 ins_pipe( pipe_slow ); 6762 %} 6763 6764 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6765 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6766 match(Set dst (SubVB src1 src2)); 6767 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6768 ins_encode %{ 6769 int vector_len = 2; 6770 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6771 %} 6772 ins_pipe( pipe_slow ); 6773 %} 6774 6775 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6776 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6777 match(Set dst (SubVB src (LoadVector mem))); 6778 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6779 ins_encode %{ 6780 int vector_len = 2; 6781 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6782 %} 6783 ins_pipe( pipe_slow ); 6784 %} 6785 6786 // Shorts/Chars vector sub 6787 instruct vsub2S(vecS dst, vecS src) %{ 6788 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6789 match(Set dst (SubVS dst src)); 6790 format %{ "psubw $dst,$src\t! sub packed2S" %} 6791 ins_encode %{ 6792 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6793 %} 6794 ins_pipe( pipe_slow ); 6795 %} 6796 6797 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6798 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6799 match(Set dst (SubVS src1 src2)); 6800 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6801 ins_encode %{ 6802 int vector_len = 0; 6803 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6804 %} 6805 ins_pipe( pipe_slow ); 6806 %} 6807 6808 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6809 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6810 match(Set dst (SubVS src1 src2)); 6811 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6812 ins_encode %{ 6813 int vector_len = 0; 6814 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6815 %} 6816 ins_pipe( pipe_slow ); 6817 %} 6818 6819 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6820 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6821 match(Set dst (SubVS dst src2)); 6822 effect(TEMP src1); 6823 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6824 ins_encode %{ 6825 int vector_len = 0; 6826 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6832 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6833 match(Set dst (SubVS src (LoadVector mem))); 6834 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6835 ins_encode %{ 6836 int vector_len = 0; 6837 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6838 %} 6839 ins_pipe( pipe_slow ); 6840 %} 6841 6842 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6843 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6844 match(Set dst (SubVS src (LoadVector mem))); 6845 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6846 ins_encode %{ 6847 int vector_len = 0; 6848 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6849 %} 6850 ins_pipe( pipe_slow ); 6851 %} 6852 6853 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6854 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6855 match(Set dst (SubVS dst (LoadVector mem))); 6856 effect(TEMP src); 6857 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6858 ins_encode %{ 6859 int vector_len = 0; 6860 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6861 %} 6862 ins_pipe( pipe_slow ); 6863 %} 6864 6865 instruct vsub4S(vecD dst, vecD src) %{ 6866 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6867 match(Set dst (SubVS dst src)); 6868 format %{ "psubw $dst,$src\t! sub packed4S" %} 6869 ins_encode %{ 6870 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6876 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6877 match(Set dst (SubVS src1 src2)); 6878 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6879 ins_encode %{ 6880 int vector_len = 0; 6881 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6882 %} 6883 ins_pipe( pipe_slow ); 6884 %} 6885 6886 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6887 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6888 match(Set dst (SubVS src1 src2)); 6889 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6890 ins_encode %{ 6891 int vector_len = 0; 6892 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6893 %} 6894 ins_pipe( pipe_slow ); 6895 %} 6896 6897 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6898 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6899 match(Set dst (SubVS dst src2)); 6900 effect(TEMP src1); 6901 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 6902 ins_encode %{ 6903 int vector_len = 0; 6904 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6905 %} 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6910 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6911 match(Set dst (SubVS src (LoadVector mem))); 6912 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6913 ins_encode %{ 6914 int vector_len = 0; 6915 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6921 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6922 match(Set dst (SubVS src (LoadVector mem))); 6923 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6924 ins_encode %{ 6925 int vector_len = 0; 6926 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6932 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6933 match(Set dst (SubVS dst (LoadVector mem))); 6934 effect(TEMP src); 6935 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 6936 ins_encode %{ 6937 int vector_len = 0; 6938 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 instruct vsub8S(vecX dst, vecX src) %{ 6944 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6945 match(Set dst (SubVS dst src)); 6946 format %{ "psubw $dst,$src\t! sub packed8S" %} 6947 ins_encode %{ 6948 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6949 %} 6950 ins_pipe( pipe_slow ); 6951 %} 6952 6953 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6954 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6955 match(Set dst (SubVS src1 src2)); 6956 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6957 ins_encode %{ 6958 int vector_len = 0; 6959 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6965 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6966 match(Set dst (SubVS src1 src2)); 6967 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6968 ins_encode %{ 6969 int vector_len = 0; 6970 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6971 %} 6972 ins_pipe( pipe_slow ); 6973 %} 6974 6975 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6976 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6977 match(Set dst (SubVS dst src2)); 6978 effect(TEMP src1); 6979 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 6980 ins_encode %{ 6981 int vector_len = 0; 6982 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6983 %} 6984 ins_pipe( pipe_slow ); 6985 %} 6986 6987 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6988 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6989 match(Set dst (SubVS src (LoadVector mem))); 6990 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 6991 ins_encode %{ 6992 int vector_len = 0; 6993 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6994 %} 6995 ins_pipe( pipe_slow ); 6996 %} 6997 6998 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6999 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7000 match(Set dst (SubVS src (LoadVector mem))); 7001 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7002 ins_encode %{ 7003 int vector_len = 0; 7004 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7005 %} 7006 ins_pipe( pipe_slow ); 7007 %} 7008 7009 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7010 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7011 match(Set dst (SubVS dst (LoadVector mem))); 7012 effect(TEMP src); 7013 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7014 ins_encode %{ 7015 int vector_len = 0; 7016 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7017 %} 7018 ins_pipe( pipe_slow ); 7019 %} 7020 7021 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7022 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7023 match(Set dst (SubVS src1 src2)); 7024 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7025 ins_encode %{ 7026 int vector_len = 1; 7027 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7028 %} 7029 ins_pipe( pipe_slow ); 7030 %} 7031 7032 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7033 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7034 match(Set dst (SubVS src1 src2)); 7035 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7036 ins_encode %{ 7037 int vector_len = 1; 7038 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7044 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7045 match(Set dst (SubVS dst src2)); 7046 effect(TEMP src1); 7047 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7048 ins_encode %{ 7049 int vector_len = 1; 7050 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7056 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7057 match(Set dst (SubVS src (LoadVector mem))); 7058 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7059 ins_encode %{ 7060 int vector_len = 1; 7061 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7062 %} 7063 ins_pipe( pipe_slow ); 7064 %} 7065 7066 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7067 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7068 match(Set dst (SubVS src (LoadVector mem))); 7069 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7070 ins_encode %{ 7071 int vector_len = 1; 7072 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7073 %} 7074 ins_pipe( pipe_slow ); 7075 %} 7076 7077 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7078 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7079 match(Set dst (SubVS dst (LoadVector mem))); 7080 effect(TEMP src); 7081 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7082 ins_encode %{ 7083 int vector_len = 1; 7084 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7085 %} 7086 ins_pipe( pipe_slow ); 7087 %} 7088 7089 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7090 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7091 match(Set dst (SubVS src1 src2)); 7092 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7093 ins_encode %{ 7094 int vector_len = 2; 7095 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7096 %} 7097 ins_pipe( pipe_slow ); 7098 %} 7099 7100 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7101 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7102 match(Set dst (SubVS src (LoadVector mem))); 7103 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7104 ins_encode %{ 7105 int vector_len = 2; 7106 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7107 %} 7108 ins_pipe( pipe_slow ); 7109 %} 7110 7111 // Integers vector sub 7112 instruct vsub2I(vecD dst, vecD src) %{ 7113 predicate(n->as_Vector()->length() == 2); 7114 match(Set dst (SubVI dst src)); 7115 format %{ "psubd $dst,$src\t! sub packed2I" %} 7116 ins_encode %{ 7117 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7118 %} 7119 ins_pipe( pipe_slow ); 7120 %} 7121 7122 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7123 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7124 match(Set dst (SubVI src1 src2)); 7125 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7126 ins_encode %{ 7127 int vector_len = 0; 7128 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7129 %} 7130 ins_pipe( pipe_slow ); 7131 %} 7132 7133 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7134 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7135 match(Set dst (SubVI src (LoadVector mem))); 7136 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7137 ins_encode %{ 7138 int vector_len = 0; 7139 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7140 %} 7141 ins_pipe( pipe_slow ); 7142 %} 7143 7144 instruct vsub4I(vecX dst, vecX src) %{ 7145 predicate(n->as_Vector()->length() == 4); 7146 match(Set dst (SubVI dst src)); 7147 format %{ "psubd $dst,$src\t! sub packed4I" %} 7148 ins_encode %{ 7149 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7150 %} 7151 ins_pipe( pipe_slow ); 7152 %} 7153 7154 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7155 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7156 match(Set dst (SubVI src1 src2)); 7157 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7158 ins_encode %{ 7159 int vector_len = 0; 7160 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7161 %} 7162 ins_pipe( pipe_slow ); 7163 %} 7164 7165 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7166 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7167 match(Set dst (SubVI src (LoadVector mem))); 7168 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7169 ins_encode %{ 7170 int vector_len = 0; 7171 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7172 %} 7173 ins_pipe( pipe_slow ); 7174 %} 7175 7176 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7177 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7178 match(Set dst (SubVI src1 src2)); 7179 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7180 ins_encode %{ 7181 int vector_len = 1; 7182 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7183 %} 7184 ins_pipe( pipe_slow ); 7185 %} 7186 7187 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7188 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7189 match(Set dst (SubVI src (LoadVector mem))); 7190 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7191 ins_encode %{ 7192 int vector_len = 1; 7193 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7194 %} 7195 ins_pipe( pipe_slow ); 7196 %} 7197 7198 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7199 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7200 match(Set dst (SubVI src1 src2)); 7201 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7202 ins_encode %{ 7203 int vector_len = 2; 7204 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7205 %} 7206 ins_pipe( pipe_slow ); 7207 %} 7208 7209 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7210 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7211 match(Set dst (SubVI src (LoadVector mem))); 7212 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7213 ins_encode %{ 7214 int vector_len = 2; 7215 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7216 %} 7217 ins_pipe( pipe_slow ); 7218 %} 7219 7220 // Longs vector sub 7221 instruct vsub2L(vecX dst, vecX src) %{ 7222 predicate(n->as_Vector()->length() == 2); 7223 match(Set dst (SubVL dst src)); 7224 format %{ "psubq $dst,$src\t! sub packed2L" %} 7225 ins_encode %{ 7226 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7227 %} 7228 ins_pipe( pipe_slow ); 7229 %} 7230 7231 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7232 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7233 match(Set dst (SubVL src1 src2)); 7234 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7235 ins_encode %{ 7236 int vector_len = 0; 7237 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7238 %} 7239 ins_pipe( pipe_slow ); 7240 %} 7241 7242 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7243 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7244 match(Set dst (SubVL src (LoadVector mem))); 7245 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7246 ins_encode %{ 7247 int vector_len = 0; 7248 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7249 %} 7250 ins_pipe( pipe_slow ); 7251 %} 7252 7253 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7254 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7255 match(Set dst (SubVL src1 src2)); 7256 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7257 ins_encode %{ 7258 int vector_len = 1; 7259 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7260 %} 7261 ins_pipe( pipe_slow ); 7262 %} 7263 7264 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7265 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7266 match(Set dst (SubVL src (LoadVector mem))); 7267 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7268 ins_encode %{ 7269 int vector_len = 1; 7270 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7271 %} 7272 ins_pipe( pipe_slow ); 7273 %} 7274 7275 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7276 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7277 match(Set dst (SubVL src1 src2)); 7278 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7279 ins_encode %{ 7280 int vector_len = 2; 7281 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7282 %} 7283 ins_pipe( pipe_slow ); 7284 %} 7285 7286 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7287 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7288 match(Set dst (SubVL src (LoadVector mem))); 7289 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7290 ins_encode %{ 7291 int vector_len = 2; 7292 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7293 %} 7294 ins_pipe( pipe_slow ); 7295 %} 7296 7297 // Floats vector sub 7298 instruct vsub2F(vecD dst, vecD src) %{ 7299 predicate(n->as_Vector()->length() == 2); 7300 match(Set dst (SubVF dst src)); 7301 format %{ "subps $dst,$src\t! sub packed2F" %} 7302 ins_encode %{ 7303 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7304 %} 7305 ins_pipe( pipe_slow ); 7306 %} 7307 7308 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7309 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7310 match(Set dst (SubVF src1 src2)); 7311 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7312 ins_encode %{ 7313 int vector_len = 0; 7314 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7320 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7321 match(Set dst (SubVF src (LoadVector mem))); 7322 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7323 ins_encode %{ 7324 int vector_len = 0; 7325 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7326 %} 7327 ins_pipe( pipe_slow ); 7328 %} 7329 7330 instruct vsub4F(vecX dst, vecX src) %{ 7331 predicate(n->as_Vector()->length() == 4); 7332 match(Set dst (SubVF dst src)); 7333 format %{ "subps $dst,$src\t! sub packed4F" %} 7334 ins_encode %{ 7335 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7341 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7342 match(Set dst (SubVF src1 src2)); 7343 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7344 ins_encode %{ 7345 int vector_len = 0; 7346 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7352 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7353 match(Set dst (SubVF src (LoadVector mem))); 7354 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7355 ins_encode %{ 7356 int vector_len = 0; 7357 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7363 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7364 match(Set dst (SubVF src1 src2)); 7365 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7366 ins_encode %{ 7367 int vector_len = 1; 7368 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7374 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7375 match(Set dst (SubVF src (LoadVector mem))); 7376 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7377 ins_encode %{ 7378 int vector_len = 1; 7379 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7385 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7386 match(Set dst (SubVF src1 src2)); 7387 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7388 ins_encode %{ 7389 int vector_len = 2; 7390 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7396 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7397 match(Set dst (SubVF src (LoadVector mem))); 7398 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7399 ins_encode %{ 7400 int vector_len = 2; 7401 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 // Doubles vector sub 7407 instruct vsub2D(vecX dst, vecX src) %{ 7408 predicate(n->as_Vector()->length() == 2); 7409 match(Set dst (SubVD dst src)); 7410 format %{ "subpd $dst,$src\t! sub packed2D" %} 7411 ins_encode %{ 7412 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7418 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7419 match(Set dst (SubVD src1 src2)); 7420 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7421 ins_encode %{ 7422 int vector_len = 0; 7423 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7429 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7430 match(Set dst (SubVD src (LoadVector mem))); 7431 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7432 ins_encode %{ 7433 int vector_len = 0; 7434 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7440 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7441 match(Set dst (SubVD src1 src2)); 7442 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7443 ins_encode %{ 7444 int vector_len = 1; 7445 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7451 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7452 match(Set dst (SubVD src (LoadVector mem))); 7453 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7454 ins_encode %{ 7455 int vector_len = 1; 7456 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7462 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7463 match(Set dst (SubVD src1 src2)); 7464 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7465 ins_encode %{ 7466 int vector_len = 2; 7467 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7473 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7474 match(Set dst (SubVD src (LoadVector mem))); 7475 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7476 ins_encode %{ 7477 int vector_len = 2; 7478 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 // --------------------------------- MUL -------------------------------------- 7484 7485 // Shorts/Chars vector mul 7486 instruct vmul2S(vecS dst, vecS src) %{ 7487 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7488 match(Set dst (MulVS dst src)); 7489 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7490 ins_encode %{ 7491 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7497 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7498 match(Set dst (MulVS src1 src2)); 7499 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7500 ins_encode %{ 7501 int vector_len = 0; 7502 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7503 %} 7504 ins_pipe( pipe_slow ); 7505 %} 7506 7507 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7508 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7509 match(Set dst (MulVS src1 src2)); 7510 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7511 ins_encode %{ 7512 int vector_len = 0; 7513 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7514 %} 7515 ins_pipe( pipe_slow ); 7516 %} 7517 7518 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7519 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7520 match(Set dst (MulVS dst src2)); 7521 effect(TEMP src1); 7522 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7523 ins_encode %{ 7524 int vector_len = 0; 7525 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7526 %} 7527 ins_pipe( pipe_slow ); 7528 %} 7529 7530 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7531 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7532 match(Set dst (MulVS src (LoadVector mem))); 7533 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7534 ins_encode %{ 7535 int vector_len = 0; 7536 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7537 %} 7538 ins_pipe( pipe_slow ); 7539 %} 7540 7541 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7542 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7543 match(Set dst (MulVS src (LoadVector mem))); 7544 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7545 ins_encode %{ 7546 int vector_len = 0; 7547 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7548 %} 7549 ins_pipe( pipe_slow ); 7550 %} 7551 7552 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7553 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7554 match(Set dst (MulVS dst (LoadVector mem))); 7555 effect(TEMP src); 7556 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7557 ins_encode %{ 7558 int vector_len = 0; 7559 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7560 %} 7561 ins_pipe( pipe_slow ); 7562 %} 7563 7564 instruct vmul4S(vecD dst, vecD src) %{ 7565 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7566 match(Set dst (MulVS dst src)); 7567 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7568 ins_encode %{ 7569 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7570 %} 7571 ins_pipe( pipe_slow ); 7572 %} 7573 7574 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7575 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7576 match(Set dst (MulVS src1 src2)); 7577 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7578 ins_encode %{ 7579 int vector_len = 0; 7580 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7581 %} 7582 ins_pipe( pipe_slow ); 7583 %} 7584 7585 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7586 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7587 match(Set dst (MulVS src1 src2)); 7588 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7589 ins_encode %{ 7590 int vector_len = 0; 7591 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7592 %} 7593 ins_pipe( pipe_slow ); 7594 %} 7595 7596 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7597 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7598 match(Set dst (MulVS dst src2)); 7599 effect(TEMP src1); 7600 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7601 ins_encode %{ 7602 int vector_len = 0; 7603 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7609 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7610 match(Set dst (MulVS src (LoadVector mem))); 7611 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7612 ins_encode %{ 7613 int vector_len = 0; 7614 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7615 %} 7616 ins_pipe( pipe_slow ); 7617 %} 7618 7619 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7620 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7621 match(Set dst (MulVS src (LoadVector mem))); 7622 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7623 ins_encode %{ 7624 int vector_len = 0; 7625 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7631 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7632 match(Set dst (MulVS dst (LoadVector mem))); 7633 effect(TEMP src); 7634 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7635 ins_encode %{ 7636 int vector_len = 0; 7637 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7638 %} 7639 ins_pipe( pipe_slow ); 7640 %} 7641 7642 instruct vmul8S(vecX dst, vecX src) %{ 7643 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7644 match(Set dst (MulVS dst src)); 7645 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7646 ins_encode %{ 7647 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7648 %} 7649 ins_pipe( pipe_slow ); 7650 %} 7651 7652 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7653 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7654 match(Set dst (MulVS src1 src2)); 7655 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7656 ins_encode %{ 7657 int vector_len = 0; 7658 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7659 %} 7660 ins_pipe( pipe_slow ); 7661 %} 7662 7663 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7664 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7665 match(Set dst (MulVS src1 src2)); 7666 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7667 ins_encode %{ 7668 int vector_len = 0; 7669 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7670 %} 7671 ins_pipe( pipe_slow ); 7672 %} 7673 7674 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7675 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7676 match(Set dst (MulVS dst src2)); 7677 effect(TEMP src1); 7678 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7679 ins_encode %{ 7680 int vector_len = 0; 7681 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7682 %} 7683 ins_pipe( pipe_slow ); 7684 %} 7685 7686 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7687 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7688 match(Set dst (MulVS src (LoadVector mem))); 7689 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7690 ins_encode %{ 7691 int vector_len = 0; 7692 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7693 %} 7694 ins_pipe( pipe_slow ); 7695 %} 7696 7697 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7698 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7699 match(Set dst (MulVS src (LoadVector mem))); 7700 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7701 ins_encode %{ 7702 int vector_len = 0; 7703 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7709 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7710 match(Set dst (MulVS dst (LoadVector mem))); 7711 effect(TEMP src); 7712 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7713 ins_encode %{ 7714 int vector_len = 0; 7715 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7716 %} 7717 ins_pipe( pipe_slow ); 7718 %} 7719 7720 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7721 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7722 match(Set dst (MulVS src1 src2)); 7723 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7724 ins_encode %{ 7725 int vector_len = 1; 7726 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7727 %} 7728 ins_pipe( pipe_slow ); 7729 %} 7730 7731 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7732 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7733 match(Set dst (MulVS src1 src2)); 7734 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7735 ins_encode %{ 7736 int vector_len = 1; 7737 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7743 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7744 match(Set dst (MulVS dst src2)); 7745 effect(TEMP src1); 7746 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7747 ins_encode %{ 7748 int vector_len = 1; 7749 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7755 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7756 match(Set dst (MulVS src (LoadVector mem))); 7757 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7758 ins_encode %{ 7759 int vector_len = 1; 7760 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7766 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7767 match(Set dst (MulVS src (LoadVector mem))); 7768 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7769 ins_encode %{ 7770 int vector_len = 1; 7771 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7772 %} 7773 ins_pipe( pipe_slow ); 7774 %} 7775 7776 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7777 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7778 match(Set dst (MulVS dst (LoadVector mem))); 7779 effect(TEMP src); 7780 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7781 ins_encode %{ 7782 int vector_len = 1; 7783 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7789 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7790 match(Set dst (MulVS src1 src2)); 7791 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7792 ins_encode %{ 7793 int vector_len = 2; 7794 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7800 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7801 match(Set dst (MulVS src (LoadVector mem))); 7802 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7803 ins_encode %{ 7804 int vector_len = 2; 7805 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7806 %} 7807 ins_pipe( pipe_slow ); 7808 %} 7809 7810 // Integers vector mul (sse4_1) 7811 instruct vmul2I(vecD dst, vecD src) %{ 7812 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7813 match(Set dst (MulVI dst src)); 7814 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7815 ins_encode %{ 7816 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7817 %} 7818 ins_pipe( pipe_slow ); 7819 %} 7820 7821 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7822 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7823 match(Set dst (MulVI src1 src2)); 7824 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7825 ins_encode %{ 7826 int vector_len = 0; 7827 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7828 %} 7829 ins_pipe( pipe_slow ); 7830 %} 7831 7832 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7833 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7834 match(Set dst (MulVI src (LoadVector mem))); 7835 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7836 ins_encode %{ 7837 int vector_len = 0; 7838 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7839 %} 7840 ins_pipe( pipe_slow ); 7841 %} 7842 7843 instruct vmul4I(vecX dst, vecX src) %{ 7844 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7845 match(Set dst (MulVI dst src)); 7846 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7847 ins_encode %{ 7848 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7849 %} 7850 ins_pipe( pipe_slow ); 7851 %} 7852 7853 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7854 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7855 match(Set dst (MulVI src1 src2)); 7856 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7857 ins_encode %{ 7858 int vector_len = 0; 7859 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7860 %} 7861 ins_pipe( pipe_slow ); 7862 %} 7863 7864 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 7865 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7866 match(Set dst (MulVI src (LoadVector mem))); 7867 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 7868 ins_encode %{ 7869 int vector_len = 0; 7870 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7871 %} 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 7876 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7877 match(Set dst (MulVL src1 src2)); 7878 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 7879 ins_encode %{ 7880 int vector_len = 0; 7881 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7882 %} 7883 ins_pipe( pipe_slow ); 7884 %} 7885 7886 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 7887 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 7888 match(Set dst (MulVL src (LoadVector mem))); 7889 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 7890 ins_encode %{ 7891 int vector_len = 0; 7892 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7893 %} 7894 ins_pipe( pipe_slow ); 7895 %} 7896 7897 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 7898 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7899 match(Set dst (MulVL src1 src2)); 7900 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 7901 ins_encode %{ 7902 int vector_len = 1; 7903 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7904 %} 7905 ins_pipe( pipe_slow ); 7906 %} 7907 7908 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 7909 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 7910 match(Set dst (MulVL src (LoadVector mem))); 7911 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 7912 ins_encode %{ 7913 int vector_len = 1; 7914 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7920 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7921 match(Set dst (MulVL src1 src2)); 7922 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 7923 ins_encode %{ 7924 int vector_len = 2; 7925 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 7931 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 7932 match(Set dst (MulVL src (LoadVector mem))); 7933 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 7934 ins_encode %{ 7935 int vector_len = 2; 7936 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 7942 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7943 match(Set dst (MulVI src1 src2)); 7944 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 7945 ins_encode %{ 7946 int vector_len = 1; 7947 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7948 %} 7949 ins_pipe( pipe_slow ); 7950 %} 7951 7952 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 7953 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7954 match(Set dst (MulVI src (LoadVector mem))); 7955 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 7956 ins_encode %{ 7957 int vector_len = 1; 7958 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7959 %} 7960 ins_pipe( pipe_slow ); 7961 %} 7962 7963 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7964 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7965 match(Set dst (MulVI src1 src2)); 7966 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 7967 ins_encode %{ 7968 int vector_len = 2; 7969 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7970 %} 7971 ins_pipe( pipe_slow ); 7972 %} 7973 7974 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 7975 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7976 match(Set dst (MulVI src (LoadVector mem))); 7977 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 7978 ins_encode %{ 7979 int vector_len = 2; 7980 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 // Floats vector mul 7986 instruct vmul2F(vecD dst, vecD src) %{ 7987 predicate(n->as_Vector()->length() == 2); 7988 match(Set dst (MulVF dst src)); 7989 format %{ "mulps $dst,$src\t! mul packed2F" %} 7990 ins_encode %{ 7991 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 7997 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7998 match(Set dst (MulVF src1 src2)); 7999 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8000 ins_encode %{ 8001 int vector_len = 0; 8002 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8008 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8009 match(Set dst (MulVF src (LoadVector mem))); 8010 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8011 ins_encode %{ 8012 int vector_len = 0; 8013 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 instruct vmul4F(vecX dst, vecX src) %{ 8019 predicate(n->as_Vector()->length() == 4); 8020 match(Set dst (MulVF dst src)); 8021 format %{ "mulps $dst,$src\t! mul packed4F" %} 8022 ins_encode %{ 8023 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8029 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8030 match(Set dst (MulVF src1 src2)); 8031 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8032 ins_encode %{ 8033 int vector_len = 0; 8034 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8035 %} 8036 ins_pipe( pipe_slow ); 8037 %} 8038 8039 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8040 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8041 match(Set dst (MulVF src (LoadVector mem))); 8042 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8043 ins_encode %{ 8044 int vector_len = 0; 8045 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8051 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8052 match(Set dst (MulVF src1 src2)); 8053 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8054 ins_encode %{ 8055 int vector_len = 1; 8056 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8062 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8063 match(Set dst (MulVF src (LoadVector mem))); 8064 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8065 ins_encode %{ 8066 int vector_len = 1; 8067 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8073 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8074 match(Set dst (MulVF src1 src2)); 8075 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8076 ins_encode %{ 8077 int vector_len = 2; 8078 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8084 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8085 match(Set dst (MulVF src (LoadVector mem))); 8086 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8087 ins_encode %{ 8088 int vector_len = 2; 8089 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8090 %} 8091 ins_pipe( pipe_slow ); 8092 %} 8093 8094 // Doubles vector mul 8095 instruct vmul2D(vecX dst, vecX src) %{ 8096 predicate(n->as_Vector()->length() == 2); 8097 match(Set dst (MulVD dst src)); 8098 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8099 ins_encode %{ 8100 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8106 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8107 match(Set dst (MulVD src1 src2)); 8108 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8109 ins_encode %{ 8110 int vector_len = 0; 8111 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8112 %} 8113 ins_pipe( pipe_slow ); 8114 %} 8115 8116 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8117 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8118 match(Set dst (MulVD src (LoadVector mem))); 8119 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8120 ins_encode %{ 8121 int vector_len = 0; 8122 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8123 %} 8124 ins_pipe( pipe_slow ); 8125 %} 8126 8127 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8128 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8129 match(Set dst (MulVD src1 src2)); 8130 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8131 ins_encode %{ 8132 int vector_len = 1; 8133 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8134 %} 8135 ins_pipe( pipe_slow ); 8136 %} 8137 8138 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8139 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8140 match(Set dst (MulVD src (LoadVector mem))); 8141 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8142 ins_encode %{ 8143 int vector_len = 1; 8144 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8145 %} 8146 ins_pipe( pipe_slow ); 8147 %} 8148 8149 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8150 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8151 match(Set dst (MulVD src1 src2)); 8152 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8153 ins_encode %{ 8154 int vector_len = 2; 8155 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8156 %} 8157 ins_pipe( pipe_slow ); 8158 %} 8159 8160 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8161 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8162 match(Set dst (MulVD src (LoadVector mem))); 8163 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8164 ins_encode %{ 8165 int vector_len = 2; 8166 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8167 %} 8168 ins_pipe( pipe_slow ); 8169 %} 8170 8171 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8172 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8173 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8174 effect(TEMP dst, USE src1, USE src2); 8175 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8176 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8177 %} 8178 ins_encode %{ 8179 int vector_len = 1; 8180 int cond = (Assembler::Condition)($copnd$$cmpcode); 8181 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8182 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8183 %} 8184 ins_pipe( pipe_slow ); 8185 %} 8186 8187 // --------------------------------- DIV -------------------------------------- 8188 8189 // Floats vector div 8190 instruct vdiv2F(vecD dst, vecD src) %{ 8191 predicate(n->as_Vector()->length() == 2); 8192 match(Set dst (DivVF dst src)); 8193 format %{ "divps $dst,$src\t! div packed2F" %} 8194 ins_encode %{ 8195 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8201 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8202 match(Set dst (DivVF src1 src2)); 8203 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8204 ins_encode %{ 8205 int vector_len = 0; 8206 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8207 %} 8208 ins_pipe( pipe_slow ); 8209 %} 8210 8211 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8212 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8213 match(Set dst (DivVF src (LoadVector mem))); 8214 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8215 ins_encode %{ 8216 int vector_len = 0; 8217 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vdiv4F(vecX dst, vecX src) %{ 8223 predicate(n->as_Vector()->length() == 4); 8224 match(Set dst (DivVF dst src)); 8225 format %{ "divps $dst,$src\t! div packed4F" %} 8226 ins_encode %{ 8227 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8233 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8234 match(Set dst (DivVF src1 src2)); 8235 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8236 ins_encode %{ 8237 int vector_len = 0; 8238 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8244 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8245 match(Set dst (DivVF src (LoadVector mem))); 8246 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8247 ins_encode %{ 8248 int vector_len = 0; 8249 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8255 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8256 match(Set dst (DivVF src1 src2)); 8257 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8258 ins_encode %{ 8259 int vector_len = 1; 8260 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8266 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8267 match(Set dst (DivVF src (LoadVector mem))); 8268 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8269 ins_encode %{ 8270 int vector_len = 1; 8271 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8277 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8278 match(Set dst (DivVF src1 src2)); 8279 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8280 ins_encode %{ 8281 int vector_len = 2; 8282 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8287 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8288 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8289 match(Set dst (DivVF src (LoadVector mem))); 8290 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8291 ins_encode %{ 8292 int vector_len = 2; 8293 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 // Doubles vector div 8299 instruct vdiv2D(vecX dst, vecX src) %{ 8300 predicate(n->as_Vector()->length() == 2); 8301 match(Set dst (DivVD dst src)); 8302 format %{ "divpd $dst,$src\t! div packed2D" %} 8303 ins_encode %{ 8304 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8310 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8311 match(Set dst (DivVD src1 src2)); 8312 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8313 ins_encode %{ 8314 int vector_len = 0; 8315 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8321 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8322 match(Set dst (DivVD src (LoadVector mem))); 8323 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8324 ins_encode %{ 8325 int vector_len = 0; 8326 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8332 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8333 match(Set dst (DivVD src1 src2)); 8334 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8335 ins_encode %{ 8336 int vector_len = 1; 8337 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8343 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8344 match(Set dst (DivVD src (LoadVector mem))); 8345 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8346 ins_encode %{ 8347 int vector_len = 1; 8348 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8354 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8355 match(Set dst (DivVD src1 src2)); 8356 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8357 ins_encode %{ 8358 int vector_len = 2; 8359 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8365 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8366 match(Set dst (DivVD src (LoadVector mem))); 8367 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8368 ins_encode %{ 8369 int vector_len = 2; 8370 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 8375 // ------------------------------ Shift --------------------------------------- 8376 8377 // Left and right shift count vectors are the same on x86 8378 // (only lowest bits of xmm reg are used for count). 8379 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8380 match(Set dst (LShiftCntV cnt)); 8381 match(Set dst (RShiftCntV cnt)); 8382 format %{ "movd $dst,$cnt\t! load shift count" %} 8383 ins_encode %{ 8384 __ movdl($dst$$XMMRegister, $cnt$$Register); 8385 %} 8386 ins_pipe( pipe_slow ); 8387 %} 8388 8389 // --------------------------------- Sqrt -------------------------------------- 8390 8391 // Floating point vector sqrt - double precision only 8392 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8394 match(Set dst (SqrtVD src)); 8395 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8396 ins_encode %{ 8397 int vector_len = 0; 8398 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8399 %} 8400 ins_pipe( pipe_slow ); 8401 %} 8402 8403 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8405 match(Set dst (SqrtVD (LoadVector mem))); 8406 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8407 ins_encode %{ 8408 int vector_len = 0; 8409 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8410 %} 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8416 match(Set dst (SqrtVD src)); 8417 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8418 ins_encode %{ 8419 int vector_len = 1; 8420 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8421 %} 8422 ins_pipe( pipe_slow ); 8423 %} 8424 8425 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8427 match(Set dst (SqrtVD (LoadVector mem))); 8428 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8429 ins_encode %{ 8430 int vector_len = 1; 8431 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8432 %} 8433 ins_pipe( pipe_slow ); 8434 %} 8435 8436 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8437 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8438 match(Set dst (SqrtVD src)); 8439 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8440 ins_encode %{ 8441 int vector_len = 2; 8442 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8443 %} 8444 ins_pipe( pipe_slow ); 8445 %} 8446 8447 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8448 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8449 match(Set dst (SqrtVD (LoadVector mem))); 8450 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8451 ins_encode %{ 8452 int vector_len = 2; 8453 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8454 %} 8455 ins_pipe( pipe_slow ); 8456 %} 8457 8458 // ------------------------------ LeftShift ----------------------------------- 8459 8460 // Shorts/Chars vector left shift 8461 instruct vsll2S(vecS dst, vecS shift) %{ 8462 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8463 match(Set dst (LShiftVS dst shift)); 8464 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8465 ins_encode %{ 8466 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8467 %} 8468 ins_pipe( pipe_slow ); 8469 %} 8470 8471 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8472 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8473 match(Set dst (LShiftVS dst shift)); 8474 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8475 ins_encode %{ 8476 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8477 %} 8478 ins_pipe( pipe_slow ); 8479 %} 8480 8481 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8482 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8483 match(Set dst (LShiftVS src shift)); 8484 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8485 ins_encode %{ 8486 int vector_len = 0; 8487 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8488 %} 8489 ins_pipe( pipe_slow ); 8490 %} 8491 8492 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8493 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8494 match(Set dst (LShiftVS src shift)); 8495 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8496 ins_encode %{ 8497 int vector_len = 0; 8498 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8499 %} 8500 ins_pipe( pipe_slow ); 8501 %} 8502 8503 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8504 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8505 match(Set dst (LShiftVS dst shift)); 8506 effect(TEMP src); 8507 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8508 ins_encode %{ 8509 int vector_len = 0; 8510 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8511 %} 8512 ins_pipe( pipe_slow ); 8513 %} 8514 8515 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8516 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8517 match(Set dst (LShiftVS src shift)); 8518 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8519 ins_encode %{ 8520 int vector_len = 0; 8521 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8527 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8528 match(Set dst (LShiftVS src shift)); 8529 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8530 ins_encode %{ 8531 int vector_len = 0; 8532 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8538 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8539 match(Set dst (LShiftVS dst shift)); 8540 effect(TEMP src); 8541 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8542 ins_encode %{ 8543 int vector_len = 0; 8544 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8545 %} 8546 ins_pipe( pipe_slow ); 8547 %} 8548 8549 instruct vsll4S(vecD dst, vecS shift) %{ 8550 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8551 match(Set dst (LShiftVS dst shift)); 8552 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8553 ins_encode %{ 8554 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8555 %} 8556 ins_pipe( pipe_slow ); 8557 %} 8558 8559 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8560 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8561 match(Set dst (LShiftVS dst shift)); 8562 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8563 ins_encode %{ 8564 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8565 %} 8566 ins_pipe( pipe_slow ); 8567 %} 8568 8569 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8570 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8571 match(Set dst (LShiftVS src shift)); 8572 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8573 ins_encode %{ 8574 int vector_len = 0; 8575 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8576 %} 8577 ins_pipe( pipe_slow ); 8578 %} 8579 8580 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8581 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8582 match(Set dst (LShiftVS src shift)); 8583 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8584 ins_encode %{ 8585 int vector_len = 0; 8586 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8587 %} 8588 ins_pipe( pipe_slow ); 8589 %} 8590 8591 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8592 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8593 match(Set dst (LShiftVS dst shift)); 8594 effect(TEMP src); 8595 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8596 ins_encode %{ 8597 int vector_len = 0; 8598 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8604 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8605 match(Set dst (LShiftVS src shift)); 8606 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8607 ins_encode %{ 8608 int vector_len = 0; 8609 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8615 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8616 match(Set dst (LShiftVS src shift)); 8617 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8618 ins_encode %{ 8619 int vector_len = 0; 8620 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8621 %} 8622 ins_pipe( pipe_slow ); 8623 %} 8624 8625 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8626 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8627 match(Set dst (LShiftVS dst shift)); 8628 effect(TEMP src); 8629 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8630 ins_encode %{ 8631 int vector_len = 0; 8632 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8633 %} 8634 ins_pipe( pipe_slow ); 8635 %} 8636 8637 instruct vsll8S(vecX dst, vecS shift) %{ 8638 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8639 match(Set dst (LShiftVS dst shift)); 8640 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8641 ins_encode %{ 8642 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8648 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8649 match(Set dst (LShiftVS dst shift)); 8650 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8651 ins_encode %{ 8652 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8653 %} 8654 ins_pipe( pipe_slow ); 8655 %} 8656 8657 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8658 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8659 match(Set dst (LShiftVS src shift)); 8660 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8661 ins_encode %{ 8662 int vector_len = 0; 8663 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8664 %} 8665 ins_pipe( pipe_slow ); 8666 %} 8667 8668 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8669 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8670 match(Set dst (LShiftVS src shift)); 8671 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8672 ins_encode %{ 8673 int vector_len = 0; 8674 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8680 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8681 match(Set dst (LShiftVS dst shift)); 8682 effect(TEMP src); 8683 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8684 ins_encode %{ 8685 int vector_len = 0; 8686 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8687 %} 8688 ins_pipe( pipe_slow ); 8689 %} 8690 8691 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8692 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8693 match(Set dst (LShiftVS src shift)); 8694 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8695 ins_encode %{ 8696 int vector_len = 0; 8697 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8698 %} 8699 ins_pipe( pipe_slow ); 8700 %} 8701 8702 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8703 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8704 match(Set dst (LShiftVS src shift)); 8705 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8706 ins_encode %{ 8707 int vector_len = 0; 8708 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8714 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8715 match(Set dst (LShiftVS dst shift)); 8716 effect(TEMP src); 8717 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8718 ins_encode %{ 8719 int vector_len = 0; 8720 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8721 %} 8722 ins_pipe( pipe_slow ); 8723 %} 8724 8725 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8726 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8727 match(Set dst (LShiftVS src shift)); 8728 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8729 ins_encode %{ 8730 int vector_len = 1; 8731 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8732 %} 8733 ins_pipe( pipe_slow ); 8734 %} 8735 8736 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8737 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8738 match(Set dst (LShiftVS src shift)); 8739 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8740 ins_encode %{ 8741 int vector_len = 1; 8742 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8743 %} 8744 ins_pipe( pipe_slow ); 8745 %} 8746 8747 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8748 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8749 match(Set dst (LShiftVS dst shift)); 8750 effect(TEMP src); 8751 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8752 ins_encode %{ 8753 int vector_len = 1; 8754 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8755 %} 8756 ins_pipe( pipe_slow ); 8757 %} 8758 8759 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 8760 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8761 match(Set dst (LShiftVS src shift)); 8762 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8763 ins_encode %{ 8764 int vector_len = 1; 8765 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8766 %} 8767 ins_pipe( pipe_slow ); 8768 %} 8769 8770 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 8771 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8772 match(Set dst (LShiftVS src shift)); 8773 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8774 ins_encode %{ 8775 int vector_len = 1; 8776 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8777 %} 8778 ins_pipe( pipe_slow ); 8779 %} 8780 8781 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 8782 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8783 match(Set dst (LShiftVS dst shift)); 8784 effect(TEMP src); 8785 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8786 ins_encode %{ 8787 int vector_len = 1; 8788 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8789 %} 8790 ins_pipe( pipe_slow ); 8791 %} 8792 8793 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 8794 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8795 match(Set dst (LShiftVS src shift)); 8796 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8797 ins_encode %{ 8798 int vector_len = 2; 8799 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8800 %} 8801 ins_pipe( pipe_slow ); 8802 %} 8803 8804 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8805 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8806 match(Set dst (LShiftVS src shift)); 8807 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 8808 ins_encode %{ 8809 int vector_len = 2; 8810 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8811 %} 8812 ins_pipe( pipe_slow ); 8813 %} 8814 8815 // Integers vector left shift 8816 instruct vsll2I(vecD dst, vecS shift) %{ 8817 predicate(n->as_Vector()->length() == 2); 8818 match(Set dst (LShiftVI dst shift)); 8819 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8820 ins_encode %{ 8821 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 8827 predicate(n->as_Vector()->length() == 2); 8828 match(Set dst (LShiftVI dst shift)); 8829 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 8830 ins_encode %{ 8831 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8832 %} 8833 ins_pipe( pipe_slow ); 8834 %} 8835 8836 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 8837 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8838 match(Set dst (LShiftVI src shift)); 8839 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8840 ins_encode %{ 8841 int vector_len = 0; 8842 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8843 %} 8844 ins_pipe( pipe_slow ); 8845 %} 8846 8847 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 8848 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8849 match(Set dst (LShiftVI src shift)); 8850 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 8851 ins_encode %{ 8852 int vector_len = 0; 8853 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8854 %} 8855 ins_pipe( pipe_slow ); 8856 %} 8857 8858 instruct vsll4I(vecX dst, vecS shift) %{ 8859 predicate(n->as_Vector()->length() == 4); 8860 match(Set dst (LShiftVI dst shift)); 8861 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8862 ins_encode %{ 8863 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 8864 %} 8865 ins_pipe( pipe_slow ); 8866 %} 8867 8868 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 8869 predicate(n->as_Vector()->length() == 4); 8870 match(Set dst (LShiftVI dst shift)); 8871 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 8872 ins_encode %{ 8873 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 8874 %} 8875 ins_pipe( pipe_slow ); 8876 %} 8877 8878 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 8879 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8880 match(Set dst (LShiftVI src shift)); 8881 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8882 ins_encode %{ 8883 int vector_len = 0; 8884 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8885 %} 8886 ins_pipe( pipe_slow ); 8887 %} 8888 8889 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8890 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8891 match(Set dst (LShiftVI src shift)); 8892 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 8893 ins_encode %{ 8894 int vector_len = 0; 8895 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8896 %} 8897 ins_pipe( pipe_slow ); 8898 %} 8899 8900 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 8901 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8902 match(Set dst (LShiftVI src shift)); 8903 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8904 ins_encode %{ 8905 int vector_len = 1; 8906 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8907 %} 8908 ins_pipe( pipe_slow ); 8909 %} 8910 8911 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8912 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8913 match(Set dst (LShiftVI src shift)); 8914 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 8915 ins_encode %{ 8916 int vector_len = 1; 8917 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8918 %} 8919 ins_pipe( pipe_slow ); 8920 %} 8921 8922 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 8923 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8924 match(Set dst (LShiftVI src shift)); 8925 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8926 ins_encode %{ 8927 int vector_len = 2; 8928 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 8934 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8935 match(Set dst (LShiftVI src shift)); 8936 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 8937 ins_encode %{ 8938 int vector_len = 2; 8939 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 // Longs vector left shift 8945 instruct vsll2L(vecX dst, vecS shift) %{ 8946 predicate(n->as_Vector()->length() == 2); 8947 match(Set dst (LShiftVL dst shift)); 8948 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8949 ins_encode %{ 8950 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 8951 %} 8952 ins_pipe( pipe_slow ); 8953 %} 8954 8955 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 8956 predicate(n->as_Vector()->length() == 2); 8957 match(Set dst (LShiftVL dst shift)); 8958 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 8959 ins_encode %{ 8960 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 8961 %} 8962 ins_pipe( pipe_slow ); 8963 %} 8964 8965 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 8966 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8967 match(Set dst (LShiftVL src shift)); 8968 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8969 ins_encode %{ 8970 int vector_len = 0; 8971 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8972 %} 8973 ins_pipe( pipe_slow ); 8974 %} 8975 8976 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 8977 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8978 match(Set dst (LShiftVL src shift)); 8979 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 8980 ins_encode %{ 8981 int vector_len = 0; 8982 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 8988 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8989 match(Set dst (LShiftVL src shift)); 8990 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 8991 ins_encode %{ 8992 int vector_len = 1; 8993 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8994 %} 8995 ins_pipe( pipe_slow ); 8996 %} 8997 8998 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 8999 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9000 match(Set dst (LShiftVL src shift)); 9001 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9002 ins_encode %{ 9003 int vector_len = 1; 9004 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9005 %} 9006 ins_pipe( pipe_slow ); 9007 %} 9008 9009 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9010 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9011 match(Set dst (LShiftVL src shift)); 9012 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9013 ins_encode %{ 9014 int vector_len = 2; 9015 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9016 %} 9017 ins_pipe( pipe_slow ); 9018 %} 9019 9020 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9021 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9022 match(Set dst (LShiftVL src shift)); 9023 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9024 ins_encode %{ 9025 int vector_len = 2; 9026 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9027 %} 9028 ins_pipe( pipe_slow ); 9029 %} 9030 9031 // ----------------------- LogicalRightShift ----------------------------------- 9032 9033 // Shorts vector logical right shift produces incorrect Java result 9034 // for negative data because java code convert short value into int with 9035 // sign extension before a shift. But char vectors are fine since chars are 9036 // unsigned values. 9037 9038 instruct vsrl2S(vecS dst, vecS shift) %{ 9039 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9040 match(Set dst (URShiftVS dst shift)); 9041 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9042 ins_encode %{ 9043 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9049 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9050 match(Set dst (URShiftVS dst shift)); 9051 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9052 ins_encode %{ 9053 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9059 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9060 match(Set dst (URShiftVS src shift)); 9061 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9062 ins_encode %{ 9063 int vector_len = 0; 9064 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9070 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9071 match(Set dst (URShiftVS src shift)); 9072 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9073 ins_encode %{ 9074 int vector_len = 0; 9075 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9076 %} 9077 ins_pipe( pipe_slow ); 9078 %} 9079 9080 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9081 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9082 match(Set dst (URShiftVS dst shift)); 9083 effect(TEMP src); 9084 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9085 ins_encode %{ 9086 int vector_len = 0; 9087 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9088 %} 9089 ins_pipe( pipe_slow ); 9090 %} 9091 9092 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9093 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9094 match(Set dst (URShiftVS src shift)); 9095 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9096 ins_encode %{ 9097 int vector_len = 0; 9098 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9099 %} 9100 ins_pipe( pipe_slow ); 9101 %} 9102 9103 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9104 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9105 match(Set dst (URShiftVS src shift)); 9106 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9107 ins_encode %{ 9108 int vector_len = 0; 9109 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9110 %} 9111 ins_pipe( pipe_slow ); 9112 %} 9113 9114 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9115 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9116 match(Set dst (URShiftVS dst shift)); 9117 effect(TEMP src); 9118 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9119 ins_encode %{ 9120 int vector_len = 0; 9121 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9122 %} 9123 ins_pipe( pipe_slow ); 9124 %} 9125 9126 instruct vsrl4S(vecD dst, vecS shift) %{ 9127 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9128 match(Set dst (URShiftVS dst shift)); 9129 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9130 ins_encode %{ 9131 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9132 %} 9133 ins_pipe( pipe_slow ); 9134 %} 9135 9136 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9137 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9138 match(Set dst (URShiftVS dst shift)); 9139 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9140 ins_encode %{ 9141 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9147 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9148 match(Set dst (URShiftVS src shift)); 9149 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9150 ins_encode %{ 9151 int vector_len = 0; 9152 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9153 %} 9154 ins_pipe( pipe_slow ); 9155 %} 9156 9157 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9158 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9159 match(Set dst (URShiftVS src shift)); 9160 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9161 ins_encode %{ 9162 int vector_len = 0; 9163 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9164 %} 9165 ins_pipe( pipe_slow ); 9166 %} 9167 9168 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9169 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9170 match(Set dst (URShiftVS dst shift)); 9171 effect(TEMP src); 9172 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9173 ins_encode %{ 9174 int vector_len = 0; 9175 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9176 %} 9177 ins_pipe( pipe_slow ); 9178 %} 9179 9180 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9181 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9182 match(Set dst (URShiftVS src shift)); 9183 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9184 ins_encode %{ 9185 int vector_len = 0; 9186 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9187 %} 9188 ins_pipe( pipe_slow ); 9189 %} 9190 9191 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9192 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9193 match(Set dst (URShiftVS src shift)); 9194 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9195 ins_encode %{ 9196 int vector_len = 0; 9197 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9198 %} 9199 ins_pipe( pipe_slow ); 9200 %} 9201 9202 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9203 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9204 match(Set dst (URShiftVS dst shift)); 9205 effect(TEMP src); 9206 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9207 ins_encode %{ 9208 int vector_len = 0; 9209 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9210 %} 9211 ins_pipe( pipe_slow ); 9212 %} 9213 9214 instruct vsrl8S(vecX dst, vecS shift) %{ 9215 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9216 match(Set dst (URShiftVS dst shift)); 9217 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9218 ins_encode %{ 9219 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9220 %} 9221 ins_pipe( pipe_slow ); 9222 %} 9223 9224 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9225 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9226 match(Set dst (URShiftVS dst shift)); 9227 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9228 ins_encode %{ 9229 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9230 %} 9231 ins_pipe( pipe_slow ); 9232 %} 9233 9234 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9235 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9236 match(Set dst (URShiftVS src shift)); 9237 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9238 ins_encode %{ 9239 int vector_len = 0; 9240 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9241 %} 9242 ins_pipe( pipe_slow ); 9243 %} 9244 9245 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9246 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9247 match(Set dst (URShiftVS src shift)); 9248 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9249 ins_encode %{ 9250 int vector_len = 0; 9251 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9252 %} 9253 ins_pipe( pipe_slow ); 9254 %} 9255 9256 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9257 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9258 match(Set dst (URShiftVS dst shift)); 9259 effect(TEMP src); 9260 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9261 ins_encode %{ 9262 int vector_len = 0; 9263 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9264 %} 9265 ins_pipe( pipe_slow ); 9266 %} 9267 9268 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9269 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9270 match(Set dst (URShiftVS src shift)); 9271 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9272 ins_encode %{ 9273 int vector_len = 0; 9274 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9275 %} 9276 ins_pipe( pipe_slow ); 9277 %} 9278 9279 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9280 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9281 match(Set dst (URShiftVS src shift)); 9282 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9283 ins_encode %{ 9284 int vector_len = 0; 9285 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9286 %} 9287 ins_pipe( pipe_slow ); 9288 %} 9289 9290 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9291 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9292 match(Set dst (URShiftVS dst shift)); 9293 effect(TEMP src); 9294 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9295 ins_encode %{ 9296 int vector_len = 0; 9297 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9298 %} 9299 ins_pipe( pipe_slow ); 9300 %} 9301 9302 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9303 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9304 match(Set dst (URShiftVS src shift)); 9305 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9306 ins_encode %{ 9307 int vector_len = 1; 9308 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9309 %} 9310 ins_pipe( pipe_slow ); 9311 %} 9312 9313 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9314 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9315 match(Set dst (URShiftVS src shift)); 9316 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9317 ins_encode %{ 9318 int vector_len = 1; 9319 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9325 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9326 match(Set dst (URShiftVS dst shift)); 9327 effect(TEMP src); 9328 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9329 ins_encode %{ 9330 int vector_len = 1; 9331 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9332 %} 9333 ins_pipe( pipe_slow ); 9334 %} 9335 9336 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9337 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9338 match(Set dst (URShiftVS src shift)); 9339 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9340 ins_encode %{ 9341 int vector_len = 1; 9342 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9343 %} 9344 ins_pipe( pipe_slow ); 9345 %} 9346 9347 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9348 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9349 match(Set dst (URShiftVS src shift)); 9350 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9351 ins_encode %{ 9352 int vector_len = 1; 9353 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9354 %} 9355 ins_pipe( pipe_slow ); 9356 %} 9357 9358 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9359 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9360 match(Set dst (URShiftVS dst shift)); 9361 effect(TEMP src); 9362 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9363 ins_encode %{ 9364 int vector_len = 1; 9365 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9366 %} 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9371 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9372 match(Set dst (URShiftVS src shift)); 9373 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9374 ins_encode %{ 9375 int vector_len = 2; 9376 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9377 %} 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9382 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9383 match(Set dst (URShiftVS src shift)); 9384 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9385 ins_encode %{ 9386 int vector_len = 2; 9387 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9388 %} 9389 ins_pipe( pipe_slow ); 9390 %} 9391 9392 // Integers vector logical right shift 9393 instruct vsrl2I(vecD dst, vecS shift) %{ 9394 predicate(n->as_Vector()->length() == 2); 9395 match(Set dst (URShiftVI dst shift)); 9396 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9397 ins_encode %{ 9398 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9399 %} 9400 ins_pipe( pipe_slow ); 9401 %} 9402 9403 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9404 predicate(n->as_Vector()->length() == 2); 9405 match(Set dst (URShiftVI dst shift)); 9406 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9407 ins_encode %{ 9408 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9409 %} 9410 ins_pipe( pipe_slow ); 9411 %} 9412 9413 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9414 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9415 match(Set dst (URShiftVI src shift)); 9416 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9417 ins_encode %{ 9418 int vector_len = 0; 9419 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9420 %} 9421 ins_pipe( pipe_slow ); 9422 %} 9423 9424 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9425 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9426 match(Set dst (URShiftVI src shift)); 9427 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9428 ins_encode %{ 9429 int vector_len = 0; 9430 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9431 %} 9432 ins_pipe( pipe_slow ); 9433 %} 9434 9435 instruct vsrl4I(vecX dst, vecS shift) %{ 9436 predicate(n->as_Vector()->length() == 4); 9437 match(Set dst (URShiftVI dst shift)); 9438 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9439 ins_encode %{ 9440 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9441 %} 9442 ins_pipe( pipe_slow ); 9443 %} 9444 9445 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9446 predicate(n->as_Vector()->length() == 4); 9447 match(Set dst (URShiftVI dst shift)); 9448 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9449 ins_encode %{ 9450 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9451 %} 9452 ins_pipe( pipe_slow ); 9453 %} 9454 9455 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9456 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9457 match(Set dst (URShiftVI src shift)); 9458 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9459 ins_encode %{ 9460 int vector_len = 0; 9461 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9462 %} 9463 ins_pipe( pipe_slow ); 9464 %} 9465 9466 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9467 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9468 match(Set dst (URShiftVI src shift)); 9469 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9470 ins_encode %{ 9471 int vector_len = 0; 9472 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9473 %} 9474 ins_pipe( pipe_slow ); 9475 %} 9476 9477 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9478 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9479 match(Set dst (URShiftVI src shift)); 9480 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9481 ins_encode %{ 9482 int vector_len = 1; 9483 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9484 %} 9485 ins_pipe( pipe_slow ); 9486 %} 9487 9488 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9489 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9490 match(Set dst (URShiftVI src shift)); 9491 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9492 ins_encode %{ 9493 int vector_len = 1; 9494 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9495 %} 9496 ins_pipe( pipe_slow ); 9497 %} 9498 9499 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9500 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9501 match(Set dst (URShiftVI src shift)); 9502 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9503 ins_encode %{ 9504 int vector_len = 2; 9505 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9506 %} 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9511 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9512 match(Set dst (URShiftVI src shift)); 9513 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9514 ins_encode %{ 9515 int vector_len = 2; 9516 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9517 %} 9518 ins_pipe( pipe_slow ); 9519 %} 9520 9521 // Longs vector logical right shift 9522 instruct vsrl2L(vecX dst, vecS shift) %{ 9523 predicate(n->as_Vector()->length() == 2); 9524 match(Set dst (URShiftVL dst shift)); 9525 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9526 ins_encode %{ 9527 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9528 %} 9529 ins_pipe( pipe_slow ); 9530 %} 9531 9532 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9533 predicate(n->as_Vector()->length() == 2); 9534 match(Set dst (URShiftVL dst shift)); 9535 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9536 ins_encode %{ 9537 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9538 %} 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9543 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9544 match(Set dst (URShiftVL src shift)); 9545 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9546 ins_encode %{ 9547 int vector_len = 0; 9548 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9549 %} 9550 ins_pipe( pipe_slow ); 9551 %} 9552 9553 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9554 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9555 match(Set dst (URShiftVL src shift)); 9556 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9557 ins_encode %{ 9558 int vector_len = 0; 9559 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9560 %} 9561 ins_pipe( pipe_slow ); 9562 %} 9563 9564 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9565 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9566 match(Set dst (URShiftVL src shift)); 9567 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9568 ins_encode %{ 9569 int vector_len = 1; 9570 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9576 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9577 match(Set dst (URShiftVL src shift)); 9578 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9579 ins_encode %{ 9580 int vector_len = 1; 9581 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9582 %} 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9587 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9588 match(Set dst (URShiftVL src shift)); 9589 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9590 ins_encode %{ 9591 int vector_len = 2; 9592 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9598 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9599 match(Set dst (URShiftVL src shift)); 9600 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9601 ins_encode %{ 9602 int vector_len = 2; 9603 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9604 %} 9605 ins_pipe( pipe_slow ); 9606 %} 9607 9608 // ------------------- ArithmeticRightShift ----------------------------------- 9609 9610 // Shorts/Chars vector arithmetic right shift 9611 instruct vsra2S(vecS dst, vecS shift) %{ 9612 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9613 match(Set dst (RShiftVS dst shift)); 9614 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9615 ins_encode %{ 9616 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9617 %} 9618 ins_pipe( pipe_slow ); 9619 %} 9620 9621 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9622 predicate(n->as_Vector()->length() == 2); 9623 match(Set dst (RShiftVS dst shift)); 9624 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9625 ins_encode %{ 9626 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9632 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9633 match(Set dst (RShiftVS src shift)); 9634 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9635 ins_encode %{ 9636 int vector_len = 0; 9637 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9638 %} 9639 ins_pipe( pipe_slow ); 9640 %} 9641 9642 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9643 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9644 match(Set dst (RShiftVS src shift)); 9645 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9646 ins_encode %{ 9647 int vector_len = 0; 9648 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9649 %} 9650 ins_pipe( pipe_slow ); 9651 %} 9652 9653 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9654 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9655 match(Set dst (RShiftVS dst shift)); 9656 effect(TEMP src); 9657 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9658 ins_encode %{ 9659 int vector_len = 0; 9660 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9666 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9667 match(Set dst (RShiftVS src shift)); 9668 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9669 ins_encode %{ 9670 int vector_len = 0; 9671 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9672 %} 9673 ins_pipe( pipe_slow ); 9674 %} 9675 9676 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9677 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9678 match(Set dst (RShiftVS src shift)); 9679 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9680 ins_encode %{ 9681 int vector_len = 0; 9682 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9683 %} 9684 ins_pipe( pipe_slow ); 9685 %} 9686 9687 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9688 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9689 match(Set dst (RShiftVS dst shift)); 9690 effect(TEMP src); 9691 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9692 ins_encode %{ 9693 int vector_len = 0; 9694 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9695 %} 9696 ins_pipe( pipe_slow ); 9697 %} 9698 9699 instruct vsra4S(vecD dst, vecS shift) %{ 9700 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9701 match(Set dst (RShiftVS dst shift)); 9702 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9703 ins_encode %{ 9704 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9705 %} 9706 ins_pipe( pipe_slow ); 9707 %} 9708 9709 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9710 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9711 match(Set dst (RShiftVS dst shift)); 9712 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9713 ins_encode %{ 9714 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9715 %} 9716 ins_pipe( pipe_slow ); 9717 %} 9718 9719 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9720 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9721 match(Set dst (RShiftVS src shift)); 9722 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9723 ins_encode %{ 9724 int vector_len = 0; 9725 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9726 %} 9727 ins_pipe( pipe_slow ); 9728 %} 9729 9730 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9731 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9732 match(Set dst (RShiftVS src shift)); 9733 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9734 ins_encode %{ 9735 int vector_len = 0; 9736 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9737 %} 9738 ins_pipe( pipe_slow ); 9739 %} 9740 9741 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9742 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9743 match(Set dst (RShiftVS dst shift)); 9744 effect(TEMP src); 9745 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9746 ins_encode %{ 9747 int vector_len = 0; 9748 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9754 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9755 match(Set dst (RShiftVS src shift)); 9756 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9757 ins_encode %{ 9758 int vector_len = 0; 9759 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9760 %} 9761 ins_pipe( pipe_slow ); 9762 %} 9763 9764 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9765 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9766 match(Set dst (RShiftVS src shift)); 9767 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9768 ins_encode %{ 9769 int vector_len = 0; 9770 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 %} 9774 9775 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9776 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9777 match(Set dst (RShiftVS dst shift)); 9778 effect(TEMP src); 9779 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9780 ins_encode %{ 9781 int vector_len = 0; 9782 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9783 %} 9784 ins_pipe( pipe_slow ); 9785 %} 9786 9787 instruct vsra8S(vecX dst, vecS shift) %{ 9788 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9789 match(Set dst (RShiftVS dst shift)); 9790 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9791 ins_encode %{ 9792 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 9798 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9799 match(Set dst (RShiftVS dst shift)); 9800 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 9801 ins_encode %{ 9802 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9803 %} 9804 ins_pipe( pipe_slow ); 9805 %} 9806 9807 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9808 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9809 match(Set dst (RShiftVS src shift)); 9810 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9811 ins_encode %{ 9812 int vector_len = 0; 9813 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9814 %} 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9819 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9820 match(Set dst (RShiftVS src shift)); 9821 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9822 ins_encode %{ 9823 int vector_len = 0; 9824 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9825 %} 9826 ins_pipe( pipe_slow ); 9827 %} 9828 9829 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9830 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9831 match(Set dst (RShiftVS dst shift)); 9832 effect(TEMP src); 9833 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9834 ins_encode %{ 9835 int vector_len = 0; 9836 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9837 %} 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9842 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9843 match(Set dst (RShiftVS src shift)); 9844 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9845 ins_encode %{ 9846 int vector_len = 0; 9847 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9848 %} 9849 ins_pipe( pipe_slow ); 9850 %} 9851 9852 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9853 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9854 match(Set dst (RShiftVS src shift)); 9855 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9856 ins_encode %{ 9857 int vector_len = 0; 9858 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9864 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9865 match(Set dst (RShiftVS dst shift)); 9866 effect(TEMP src); 9867 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 9868 ins_encode %{ 9869 int vector_len = 0; 9870 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9871 %} 9872 ins_pipe( pipe_slow ); 9873 %} 9874 9875 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9876 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9877 match(Set dst (RShiftVS src shift)); 9878 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9879 ins_encode %{ 9880 int vector_len = 1; 9881 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9882 %} 9883 ins_pipe( pipe_slow ); 9884 %} 9885 9886 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9887 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9888 match(Set dst (RShiftVS src shift)); 9889 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9890 ins_encode %{ 9891 int vector_len = 1; 9892 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9893 %} 9894 ins_pipe( pipe_slow ); 9895 %} 9896 9897 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9898 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9899 match(Set dst (RShiftVS dst shift)); 9900 effect(TEMP src); 9901 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9902 ins_encode %{ 9903 int vector_len = 1; 9904 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9905 %} 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9910 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 9911 match(Set dst (RShiftVS src shift)); 9912 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9913 ins_encode %{ 9914 int vector_len = 1; 9915 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9916 %} 9917 ins_pipe( pipe_slow ); 9918 %} 9919 9920 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9921 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9922 match(Set dst (RShiftVS src shift)); 9923 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9924 ins_encode %{ 9925 int vector_len = 1; 9926 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9927 %} 9928 ins_pipe( pipe_slow ); 9929 %} 9930 9931 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9932 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9933 match(Set dst (RShiftVS dst shift)); 9934 effect(TEMP src); 9935 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 9936 ins_encode %{ 9937 int vector_len = 1; 9938 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9939 %} 9940 ins_pipe( pipe_slow ); 9941 %} 9942 9943 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9944 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9945 match(Set dst (RShiftVS src shift)); 9946 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9947 ins_encode %{ 9948 int vector_len = 2; 9949 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9950 %} 9951 ins_pipe( pipe_slow ); 9952 %} 9953 9954 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9955 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9956 match(Set dst (RShiftVS src shift)); 9957 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 9958 ins_encode %{ 9959 int vector_len = 2; 9960 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9961 %} 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 // Integers vector arithmetic right shift 9966 instruct vsra2I(vecD dst, vecS shift) %{ 9967 predicate(n->as_Vector()->length() == 2); 9968 match(Set dst (RShiftVI dst shift)); 9969 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9970 ins_encode %{ 9971 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 %} 9975 9976 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 9977 predicate(n->as_Vector()->length() == 2); 9978 match(Set dst (RShiftVI dst shift)); 9979 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 9980 ins_encode %{ 9981 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 9982 %} 9983 ins_pipe( pipe_slow ); 9984 %} 9985 9986 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 9987 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9988 match(Set dst (RShiftVI src shift)); 9989 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 9990 ins_encode %{ 9991 int vector_len = 0; 9992 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9993 %} 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9998 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9999 match(Set dst (RShiftVI src shift)); 10000 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10001 ins_encode %{ 10002 int vector_len = 0; 10003 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10004 %} 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 instruct vsra4I(vecX dst, vecS shift) %{ 10009 predicate(n->as_Vector()->length() == 4); 10010 match(Set dst (RShiftVI dst shift)); 10011 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10012 ins_encode %{ 10013 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10014 %} 10015 ins_pipe( pipe_slow ); 10016 %} 10017 10018 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10019 predicate(n->as_Vector()->length() == 4); 10020 match(Set dst (RShiftVI dst shift)); 10021 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10022 ins_encode %{ 10023 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10024 %} 10025 ins_pipe( pipe_slow ); 10026 %} 10027 10028 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10029 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10030 match(Set dst (RShiftVI src shift)); 10031 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10032 ins_encode %{ 10033 int vector_len = 0; 10034 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10035 %} 10036 ins_pipe( pipe_slow ); 10037 %} 10038 10039 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10040 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10041 match(Set dst (RShiftVI src shift)); 10042 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10043 ins_encode %{ 10044 int vector_len = 0; 10045 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10046 %} 10047 ins_pipe( pipe_slow ); 10048 %} 10049 10050 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10051 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10052 match(Set dst (RShiftVI src shift)); 10053 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10054 ins_encode %{ 10055 int vector_len = 1; 10056 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10057 %} 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10062 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10063 match(Set dst (RShiftVI src shift)); 10064 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10065 ins_encode %{ 10066 int vector_len = 1; 10067 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10068 %} 10069 ins_pipe( pipe_slow ); 10070 %} 10071 10072 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10073 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10074 match(Set dst (RShiftVI src shift)); 10075 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10076 ins_encode %{ 10077 int vector_len = 2; 10078 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10079 %} 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10084 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10085 match(Set dst (RShiftVI src shift)); 10086 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10087 ins_encode %{ 10088 int vector_len = 2; 10089 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10090 %} 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 // There are no longs vector arithmetic right shift instructions. 10095 10096 10097 // --------------------------------- AND -------------------------------------- 10098 10099 instruct vand4B(vecS dst, vecS src) %{ 10100 predicate(n->as_Vector()->length_in_bytes() == 4); 10101 match(Set dst (AndV dst src)); 10102 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10103 ins_encode %{ 10104 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10105 %} 10106 ins_pipe( pipe_slow ); 10107 %} 10108 10109 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10110 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10111 match(Set dst (AndV src1 src2)); 10112 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10113 ins_encode %{ 10114 int vector_len = 0; 10115 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10116 %} 10117 ins_pipe( pipe_slow ); 10118 %} 10119 10120 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10121 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10122 match(Set dst (AndV src (LoadVector mem))); 10123 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10124 ins_encode %{ 10125 int vector_len = 0; 10126 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10127 %} 10128 ins_pipe( pipe_slow ); 10129 %} 10130 10131 instruct vand8B(vecD dst, vecD src) %{ 10132 predicate(n->as_Vector()->length_in_bytes() == 8); 10133 match(Set dst (AndV dst src)); 10134 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10135 ins_encode %{ 10136 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10137 %} 10138 ins_pipe( pipe_slow ); 10139 %} 10140 10141 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10142 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10143 match(Set dst (AndV src1 src2)); 10144 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10145 ins_encode %{ 10146 int vector_len = 0; 10147 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10148 %} 10149 ins_pipe( pipe_slow ); 10150 %} 10151 10152 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10153 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10154 match(Set dst (AndV src (LoadVector mem))); 10155 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10156 ins_encode %{ 10157 int vector_len = 0; 10158 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10159 %} 10160 ins_pipe( pipe_slow ); 10161 %} 10162 10163 instruct vand16B(vecX dst, vecX src) %{ 10164 predicate(n->as_Vector()->length_in_bytes() == 16); 10165 match(Set dst (AndV dst src)); 10166 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10167 ins_encode %{ 10168 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10174 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10175 match(Set dst (AndV src1 src2)); 10176 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10177 ins_encode %{ 10178 int vector_len = 0; 10179 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10180 %} 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10185 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10186 match(Set dst (AndV src (LoadVector mem))); 10187 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10188 ins_encode %{ 10189 int vector_len = 0; 10190 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10191 %} 10192 ins_pipe( pipe_slow ); 10193 %} 10194 10195 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10196 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10197 match(Set dst (AndV src1 src2)); 10198 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10199 ins_encode %{ 10200 int vector_len = 1; 10201 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10202 %} 10203 ins_pipe( pipe_slow ); 10204 %} 10205 10206 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10207 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10208 match(Set dst (AndV src (LoadVector mem))); 10209 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10210 ins_encode %{ 10211 int vector_len = 1; 10212 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10213 %} 10214 ins_pipe( pipe_slow ); 10215 %} 10216 10217 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10218 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10219 match(Set dst (AndV src1 src2)); 10220 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10221 ins_encode %{ 10222 int vector_len = 2; 10223 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10224 %} 10225 ins_pipe( pipe_slow ); 10226 %} 10227 10228 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10229 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10230 match(Set dst (AndV src (LoadVector mem))); 10231 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10232 ins_encode %{ 10233 int vector_len = 2; 10234 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10235 %} 10236 ins_pipe( pipe_slow ); 10237 %} 10238 10239 // --------------------------------- OR --------------------------------------- 10240 10241 instruct vor4B(vecS dst, vecS src) %{ 10242 predicate(n->as_Vector()->length_in_bytes() == 4); 10243 match(Set dst (OrV dst src)); 10244 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10245 ins_encode %{ 10246 __ por($dst$$XMMRegister, $src$$XMMRegister); 10247 %} 10248 ins_pipe( pipe_slow ); 10249 %} 10250 10251 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10252 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10253 match(Set dst (OrV src1 src2)); 10254 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10255 ins_encode %{ 10256 int vector_len = 0; 10257 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10258 %} 10259 ins_pipe( pipe_slow ); 10260 %} 10261 10262 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10263 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10264 match(Set dst (OrV src (LoadVector mem))); 10265 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10266 ins_encode %{ 10267 int vector_len = 0; 10268 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10269 %} 10270 ins_pipe( pipe_slow ); 10271 %} 10272 10273 instruct vor8B(vecD dst, vecD src) %{ 10274 predicate(n->as_Vector()->length_in_bytes() == 8); 10275 match(Set dst (OrV dst src)); 10276 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10277 ins_encode %{ 10278 __ por($dst$$XMMRegister, $src$$XMMRegister); 10279 %} 10280 ins_pipe( pipe_slow ); 10281 %} 10282 10283 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10284 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10285 match(Set dst (OrV src1 src2)); 10286 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10287 ins_encode %{ 10288 int vector_len = 0; 10289 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10290 %} 10291 ins_pipe( pipe_slow ); 10292 %} 10293 10294 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10295 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10296 match(Set dst (OrV src (LoadVector mem))); 10297 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10298 ins_encode %{ 10299 int vector_len = 0; 10300 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10301 %} 10302 ins_pipe( pipe_slow ); 10303 %} 10304 10305 instruct vor16B(vecX dst, vecX src) %{ 10306 predicate(n->as_Vector()->length_in_bytes() == 16); 10307 match(Set dst (OrV dst src)); 10308 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10309 ins_encode %{ 10310 __ por($dst$$XMMRegister, $src$$XMMRegister); 10311 %} 10312 ins_pipe( pipe_slow ); 10313 %} 10314 10315 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10316 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10317 match(Set dst (OrV src1 src2)); 10318 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10319 ins_encode %{ 10320 int vector_len = 0; 10321 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10322 %} 10323 ins_pipe( pipe_slow ); 10324 %} 10325 10326 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10327 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10328 match(Set dst (OrV src (LoadVector mem))); 10329 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10330 ins_encode %{ 10331 int vector_len = 0; 10332 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10333 %} 10334 ins_pipe( pipe_slow ); 10335 %} 10336 10337 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10338 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10339 match(Set dst (OrV src1 src2)); 10340 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10341 ins_encode %{ 10342 int vector_len = 1; 10343 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10344 %} 10345 ins_pipe( pipe_slow ); 10346 %} 10347 10348 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10349 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10350 match(Set dst (OrV src (LoadVector mem))); 10351 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10352 ins_encode %{ 10353 int vector_len = 1; 10354 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10355 %} 10356 ins_pipe( pipe_slow ); 10357 %} 10358 10359 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10360 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10361 match(Set dst (OrV src1 src2)); 10362 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10363 ins_encode %{ 10364 int vector_len = 2; 10365 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10366 %} 10367 ins_pipe( pipe_slow ); 10368 %} 10369 10370 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10371 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10372 match(Set dst (OrV src (LoadVector mem))); 10373 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10374 ins_encode %{ 10375 int vector_len = 2; 10376 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10377 %} 10378 ins_pipe( pipe_slow ); 10379 %} 10380 10381 // --------------------------------- XOR -------------------------------------- 10382 10383 instruct vxor4B(vecS dst, vecS src) %{ 10384 predicate(n->as_Vector()->length_in_bytes() == 4); 10385 match(Set dst (XorV dst src)); 10386 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10387 ins_encode %{ 10388 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10389 %} 10390 ins_pipe( pipe_slow ); 10391 %} 10392 10393 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10394 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10395 match(Set dst (XorV src1 src2)); 10396 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10397 ins_encode %{ 10398 int vector_len = 0; 10399 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10400 %} 10401 ins_pipe( pipe_slow ); 10402 %} 10403 10404 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10405 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10406 match(Set dst (XorV src (LoadVector mem))); 10407 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10408 ins_encode %{ 10409 int vector_len = 0; 10410 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10411 %} 10412 ins_pipe( pipe_slow ); 10413 %} 10414 10415 instruct vxor8B(vecD dst, vecD src) %{ 10416 predicate(n->as_Vector()->length_in_bytes() == 8); 10417 match(Set dst (XorV dst src)); 10418 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10419 ins_encode %{ 10420 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10421 %} 10422 ins_pipe( pipe_slow ); 10423 %} 10424 10425 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10426 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10427 match(Set dst (XorV src1 src2)); 10428 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10429 ins_encode %{ 10430 int vector_len = 0; 10431 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10432 %} 10433 ins_pipe( pipe_slow ); 10434 %} 10435 10436 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10437 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10438 match(Set dst (XorV src (LoadVector mem))); 10439 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10440 ins_encode %{ 10441 int vector_len = 0; 10442 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10443 %} 10444 ins_pipe( pipe_slow ); 10445 %} 10446 10447 instruct vxor16B(vecX dst, vecX src) %{ 10448 predicate(n->as_Vector()->length_in_bytes() == 16); 10449 match(Set dst (XorV dst src)); 10450 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10451 ins_encode %{ 10452 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10453 %} 10454 ins_pipe( pipe_slow ); 10455 %} 10456 10457 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10458 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10459 match(Set dst (XorV src1 src2)); 10460 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10461 ins_encode %{ 10462 int vector_len = 0; 10463 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10464 %} 10465 ins_pipe( pipe_slow ); 10466 %} 10467 10468 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10469 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10470 match(Set dst (XorV src (LoadVector mem))); 10471 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10472 ins_encode %{ 10473 int vector_len = 0; 10474 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10475 %} 10476 ins_pipe( pipe_slow ); 10477 %} 10478 10479 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10480 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10481 match(Set dst (XorV src1 src2)); 10482 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10483 ins_encode %{ 10484 int vector_len = 1; 10485 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10486 %} 10487 ins_pipe( pipe_slow ); 10488 %} 10489 10490 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10491 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10492 match(Set dst (XorV src (LoadVector mem))); 10493 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10494 ins_encode %{ 10495 int vector_len = 1; 10496 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10497 %} 10498 ins_pipe( pipe_slow ); 10499 %} 10500 10501 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10502 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10503 match(Set dst (XorV src1 src2)); 10504 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10505 ins_encode %{ 10506 int vector_len = 2; 10507 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10508 %} 10509 ins_pipe( pipe_slow ); 10510 %} 10511 10512 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10513 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10514 match(Set dst (XorV src (LoadVector mem))); 10515 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10516 ins_encode %{ 10517 int vector_len = 2; 10518 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10519 %} 10520 ins_pipe( pipe_slow ); 10521 %} 10522 10523 // --------------------------------- FMA -------------------------------------- 10524 10525 // a * b + c 10526 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 10527 predicate(UseFMA && n->as_Vector()->length() == 2); 10528 match(Set c (FmaVD c (Binary a b))); 10529 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10530 ins_cost(150); 10531 ins_encode %{ 10532 int vector_len = 0; 10533 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10534 %} 10535 ins_pipe( pipe_slow ); 10536 %} 10537 10538 // a * b + c 10539 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 10540 predicate(UseFMA && n->as_Vector()->length() == 2); 10541 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10542 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10543 ins_cost(150); 10544 ins_encode %{ 10545 int vector_len = 0; 10546 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10547 %} 10548 ins_pipe( pipe_slow ); 10549 %} 10550 10551 10552 // a * b + c 10553 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 10554 predicate(UseFMA && n->as_Vector()->length() == 4); 10555 match(Set c (FmaVD c (Binary a b))); 10556 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10557 ins_cost(150); 10558 ins_encode %{ 10559 int vector_len = 1; 10560 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10561 %} 10562 ins_pipe( pipe_slow ); 10563 %} 10564 10565 // a * b + c 10566 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 10567 predicate(UseFMA && n->as_Vector()->length() == 4); 10568 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10569 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10570 ins_cost(150); 10571 ins_encode %{ 10572 int vector_len = 1; 10573 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10574 %} 10575 ins_pipe( pipe_slow ); 10576 %} 10577 10578 // a * b + c 10579 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 10580 predicate(UseFMA && n->as_Vector()->length() == 8); 10581 match(Set c (FmaVD c (Binary a b))); 10582 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10583 ins_cost(150); 10584 ins_encode %{ 10585 int vector_len = 2; 10586 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10587 %} 10588 ins_pipe( pipe_slow ); 10589 %} 10590 10591 // a * b + c 10592 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 10593 predicate(UseFMA && n->as_Vector()->length() == 8); 10594 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10595 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10596 ins_cost(150); 10597 ins_encode %{ 10598 int vector_len = 2; 10599 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10600 %} 10601 ins_pipe( pipe_slow ); 10602 %} 10603 10604 // a * b + c 10605 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 10606 predicate(UseFMA && n->as_Vector()->length() == 4); 10607 match(Set c (FmaVF c (Binary a b))); 10608 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10609 ins_cost(150); 10610 ins_encode %{ 10611 int vector_len = 0; 10612 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10613 %} 10614 ins_pipe( pipe_slow ); 10615 %} 10616 10617 // a * b + c 10618 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 10619 predicate(UseFMA && n->as_Vector()->length() == 4); 10620 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10621 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10622 ins_cost(150); 10623 ins_encode %{ 10624 int vector_len = 0; 10625 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10626 %} 10627 ins_pipe( pipe_slow ); 10628 %} 10629 10630 // a * b + c 10631 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 10632 predicate(UseFMA && n->as_Vector()->length() == 8); 10633 match(Set c (FmaVF c (Binary a b))); 10634 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10635 ins_cost(150); 10636 ins_encode %{ 10637 int vector_len = 1; 10638 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10639 %} 10640 ins_pipe( pipe_slow ); 10641 %} 10642 10643 // a * b + c 10644 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 10645 predicate(UseFMA && n->as_Vector()->length() == 8); 10646 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10647 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10648 ins_cost(150); 10649 ins_encode %{ 10650 int vector_len = 1; 10651 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10652 %} 10653 ins_pipe( pipe_slow ); 10654 %} 10655 10656 // a * b + c 10657 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 10658 predicate(UseFMA && n->as_Vector()->length() == 16); 10659 match(Set c (FmaVF c (Binary a b))); 10660 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10661 ins_cost(150); 10662 ins_encode %{ 10663 int vector_len = 2; 10664 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10665 %} 10666 ins_pipe( pipe_slow ); 10667 %} 10668 10669 // a * b + c 10670 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 10671 predicate(UseFMA && n->as_Vector()->length() == 16); 10672 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10673 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10674 ins_cost(150); 10675 ins_encode %{ 10676 int vector_len = 2; 10677 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10678 %} 10679 ins_pipe( pipe_slow ); 10680 %}