1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 %} 1075 1076 1077 //----------SOURCE BLOCK------------------------------------------------------- 1078 // This is a block of C++ code which provides values, functions, and 1079 // definitions necessary in the rest of the architecture description 1080 1081 source_hpp %{ 1082 // Header information of the source block. 1083 // Method declarations/definitions which are used outside 1084 // the ad-scope can conveniently be defined here. 1085 // 1086 // To keep related declarations/definitions/uses close together, 1087 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1088 1089 class NativeJump; 1090 1091 class CallStubImpl { 1092 1093 //-------------------------------------------------------------- 1094 //---< Used for optimization in Compile::shorten_branches >--- 1095 //-------------------------------------------------------------- 1096 1097 public: 1098 // Size of call trampoline stub. 1099 static uint size_call_trampoline() { 1100 return 0; // no call trampolines on this platform 1101 } 1102 1103 // number of relocations needed by a call trampoline stub 1104 static uint reloc_call_trampoline() { 1105 return 0; // no call trampolines on this platform 1106 } 1107 }; 1108 1109 class HandlerImpl { 1110 1111 public: 1112 1113 static int emit_exception_handler(CodeBuffer &cbuf); 1114 static int emit_deopt_handler(CodeBuffer& cbuf); 1115 1116 static uint size_exception_handler() { 1117 // NativeCall instruction size is the same as NativeJump. 1118 // exception handler starts out as jump and can be patched to 1119 // a call be deoptimization. (4932387) 1120 // Note that this value is also credited (in output.cpp) to 1121 // the size of the code section. 1122 return NativeJump::instruction_size; 1123 } 1124 1125 #ifdef _LP64 1126 static uint size_deopt_handler() { 1127 // three 5 byte instructions 1128 return 15; 1129 } 1130 #else 1131 static uint size_deopt_handler() { 1132 // NativeCall instruction size is the same as NativeJump. 1133 // exception handler starts out as jump and can be patched to 1134 // a call be deoptimization. (4932387) 1135 // Note that this value is also credited (in output.cpp) to 1136 // the size of the code section. 1137 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1138 } 1139 #endif 1140 }; 1141 1142 %} // end source_hpp 1143 1144 source %{ 1145 1146 #include "opto/addnode.hpp" 1147 1148 // Emit exception handler code. 1149 // Stuff framesize into a register and call a VM stub routine. 1150 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1151 1152 // Note that the code buffer's insts_mark is always relative to insts. 1153 // That's why we must use the macroassembler to generate a handler. 1154 MacroAssembler _masm(&cbuf); 1155 address base = __ start_a_stub(size_exception_handler()); 1156 if (base == NULL) { 1157 ciEnv::current()->record_failure("CodeCache is full"); 1158 return 0; // CodeBuffer::expand failed 1159 } 1160 int offset = __ offset(); 1161 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1162 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1163 __ end_a_stub(); 1164 return offset; 1165 } 1166 1167 // Emit deopt handler code. 1168 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1169 1170 // Note that the code buffer's insts_mark is always relative to insts. 1171 // That's why we must use the macroassembler to generate a handler. 1172 MacroAssembler _masm(&cbuf); 1173 address base = __ start_a_stub(size_deopt_handler()); 1174 if (base == NULL) { 1175 ciEnv::current()->record_failure("CodeCache is full"); 1176 return 0; // CodeBuffer::expand failed 1177 } 1178 int offset = __ offset(); 1179 1180 #ifdef _LP64 1181 address the_pc = (address) __ pc(); 1182 Label next; 1183 // push a "the_pc" on the stack without destroying any registers 1184 // as they all may be live. 1185 1186 // push address of "next" 1187 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1188 __ bind(next); 1189 // adjust it so it matches "the_pc" 1190 __ subptr(Address(rsp, 0), __ offset() - offset); 1191 #else 1192 InternalAddress here(__ pc()); 1193 __ pushptr(here.addr()); 1194 #endif 1195 1196 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1197 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1198 __ end_a_stub(); 1199 return offset; 1200 } 1201 1202 1203 //============================================================================= 1204 1205 // Float masks come from different places depending on platform. 1206 #ifdef _LP64 1207 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1208 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1209 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1210 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1211 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1212 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1213 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1214 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1215 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1216 static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } 1217 static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } 1218 static address vector_byte_saturationmask() { return StubRoutines::x86::vector_byte_saturation_mask(); } 1219 #else 1220 static address float_signmask() { return (address)float_signmask_pool; } 1221 static address float_signflip() { return (address)float_signflip_pool; } 1222 static address double_signmask() { return (address)double_signmask_pool; } 1223 static address double_signflip() { return (address)double_signflip_pool; } 1224 #endif 1225 1226 1227 const bool Matcher::match_rule_supported(int opcode) { 1228 if (!has_match_rule(opcode)) 1229 return false; 1230 1231 bool ret_value = true; 1232 switch (opcode) { 1233 case Op_PopCountI: 1234 case Op_PopCountL: 1235 if (!UsePopCountInstruction) 1236 ret_value = false; 1237 break; 1238 case Op_MulVB: 1239 case Op_MulVI: 1240 case Op_MulVL: 1241 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1242 ret_value = false; 1243 break; 1244 case Op_MulReductionVL: 1245 if (VM_Version::supports_avx512dq() == false) 1246 ret_value = false; 1247 break; 1248 case Op_AddReductionVL: 1249 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1250 ret_value = false; 1251 break; 1252 case Op_AddReductionVI: 1253 if (UseSSE < 3) // requires at least SSE3 1254 ret_value = false; 1255 break; 1256 case Op_MulReductionVI: 1257 if (UseSSE < 4) // requires at least SSE4 1258 ret_value = false; 1259 break; 1260 case Op_AddReductionVF: 1261 case Op_AddReductionVD: 1262 case Op_MulReductionVF: 1263 case Op_MulReductionVD: 1264 if (UseSSE < 1) // requires at least SSE 1265 ret_value = false; 1266 break; 1267 case Op_SqrtVD: 1268 case Op_SqrtVF: 1269 if (UseAVX < 1) // enabled for AVX only 1270 ret_value = false; 1271 break; 1272 case Op_CompareAndSwapL: 1273 #ifdef _LP64 1274 case Op_CompareAndSwapP: 1275 #endif 1276 if (!VM_Version::supports_cx8()) 1277 ret_value = false; 1278 break; 1279 case Op_CMoveVF: 1280 case Op_CMoveVD: 1281 if (UseAVX < 1 || UseAVX > 2) 1282 ret_value = false; 1283 break; 1284 case Op_StrIndexOf: 1285 if (!UseSSE42Intrinsics) 1286 ret_value = false; 1287 break; 1288 case Op_StrIndexOfChar: 1289 if (!UseSSE42Intrinsics) 1290 ret_value = false; 1291 break; 1292 case Op_OnSpinWait: 1293 if (VM_Version::supports_on_spin_wait() == false) 1294 ret_value = false; 1295 break; 1296 } 1297 1298 return ret_value; // Per default match rules are supported. 1299 } 1300 1301 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1302 // identify extra cases that we might want to provide match rules for 1303 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1304 bool ret_value = match_rule_supported(opcode); 1305 if (ret_value) { 1306 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1307 if (!vector_size_supported(bt, vlen)) { 1308 ret_value = false; 1309 } else if (size_in_bits > 256 && UseAVX <= 2) { 1310 // Only AVX512 supports 512-bit vectors 1311 ret_value = false; 1312 } else if (UseAVX == 0 && size_in_bits > 128) { 1313 // Only AVX supports 256-bit vectors 1314 ret_value = false; 1315 } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { 1316 // Byte and Short types are not supported in AVX512 if AVX512BW is not true. 1317 ret_value = false; 1318 } else { 1319 switch (opcode) { 1320 case Op_AddVB: 1321 case Op_SubVB: 1322 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1323 ret_value = false; 1324 break; 1325 case Op_URShiftVS: 1326 case Op_RShiftVS: 1327 case Op_LShiftVS: 1328 case Op_MulVS: 1329 case Op_AddVS: 1330 case Op_SubVS: 1331 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1332 ret_value = false; 1333 break; 1334 case Op_CMoveVF: 1335 if (vlen != 8) 1336 ret_value = false; 1337 break; 1338 case Op_CMoveVD: 1339 if (vlen != 4) 1340 ret_value = false; 1341 break; 1342 case Op_VectorMaskCmp: 1343 if (UseAVX <= 0) { ret_value = false; } 1344 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1345 break; 1346 case Op_VectorBlend: 1347 if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } 1348 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1349 break; 1350 case Op_VectorTest: 1351 if (UseAVX <= 0) { ret_value = false; } 1352 else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation 1353 break; 1354 case Op_VectorLoadMask: 1355 if (UseSSE <= 3) { ret_value = false; } 1356 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1357 break; 1358 case Op_VectorStoreMask: 1359 if (UseAVX < 2) { ret_value = false; } // Implementation limitation 1360 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1361 else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation 1362 break; 1363 default: 1364 break; 1365 } 1366 } 1367 } 1368 if (ret_value) { 1369 assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && 1370 vector_size_supported(bt, vlen), "must be supported"); 1371 } 1372 1373 return ret_value; // Per default match rules are supported. 1374 } 1375 1376 const bool Matcher::has_predicated_vectors(void) { 1377 bool ret_value = false; 1378 if (UseAVX > 2) { 1379 ret_value = VM_Version::supports_avx512vl(); 1380 } 1381 1382 return ret_value; 1383 } 1384 1385 const int Matcher::float_pressure(int default_pressure_threshold) { 1386 int float_pressure_threshold = default_pressure_threshold; 1387 #ifdef _LP64 1388 if (UseAVX > 2) { 1389 // Increase pressure threshold on machines with AVX3 which have 1390 // 2x more XMM registers. 1391 float_pressure_threshold = default_pressure_threshold * 2; 1392 } 1393 #endif 1394 return float_pressure_threshold; 1395 } 1396 1397 // Max vector size in bytes. 0 if not supported. 1398 const int Matcher::vector_width_in_bytes(BasicType bt) { 1399 assert(is_java_primitive(bt), "only primitive type vectors"); 1400 if (UseSSE < 2) return 0; 1401 // SSE2 supports 128bit vectors for all types. 1402 // AVX2 supports 256bit vectors for all types. 1403 // AVX2/EVEX supports 512bit vectors for all types. 1404 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1405 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1406 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1407 size = (UseAVX > 2) ? 64 : 32; 1408 // Use flag to limit vector size. 1409 size = MIN2(size,(int)MaxVectorSize); 1410 // Minimum 2 values in vector (or 4 for bytes). 1411 switch (bt) { 1412 case T_DOUBLE: 1413 case T_LONG: 1414 if (size < 16) return 0; 1415 break; 1416 case T_FLOAT: 1417 case T_INT: 1418 if (size < 8) return 0; 1419 break; 1420 case T_BOOLEAN: 1421 if (size < 4) return 0; 1422 break; 1423 case T_CHAR: 1424 if (size < 4) return 0; 1425 break; 1426 case T_BYTE: 1427 if (size < 4) return 0; 1428 break; 1429 case T_SHORT: 1430 if (size < 4) return 0; 1431 break; 1432 default: 1433 ShouldNotReachHere(); 1434 } 1435 return size; 1436 } 1437 1438 // Limits on vector size (number of elements) loaded into vector. 1439 const int Matcher::max_vector_size(const BasicType bt) { 1440 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1441 } 1442 const int Matcher::min_vector_size(const BasicType bt) { 1443 int max_size = max_vector_size(bt); 1444 // Min size which can be loaded into vector is 4 bytes. 1445 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1446 return MIN2(size,max_size); 1447 } 1448 1449 // Vector ideal reg corresponding to specidied size in bytes 1450 const uint Matcher::vector_ideal_reg(int size) { 1451 assert(MaxVectorSize >= size, ""); 1452 switch(size) { 1453 case 4: return Op_VecS; 1454 case 8: return Op_VecD; 1455 case 16: return Op_VecX; 1456 case 32: return Op_VecY; 1457 case 64: return Op_VecZ; 1458 } 1459 ShouldNotReachHere(); 1460 return 0; 1461 } 1462 1463 // Only lowest bits of xmm reg are used for vector shift count. 1464 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1465 return Op_VecS; 1466 } 1467 1468 // x86 supports misaligned vectors store/load. 1469 const bool Matcher::misaligned_vectors_ok() { 1470 return !AlignVector; // can be changed by flag 1471 } 1472 1473 // x86 AES instructions are compatible with SunJCE expanded 1474 // keys, hence we do not need to pass the original key to stubs 1475 const bool Matcher::pass_original_key_for_aes() { 1476 return false; 1477 } 1478 1479 1480 const bool Matcher::convi2l_type_required = true; 1481 1482 // Check for shift by small constant as well 1483 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1484 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1485 shift->in(2)->get_int() <= 3 && 1486 // Are there other uses besides address expressions? 1487 !matcher->is_visited(shift)) { 1488 address_visited.set(shift->_idx); // Flag as address_visited 1489 mstack.push(shift->in(2), Matcher::Visit); 1490 Node *conv = shift->in(1); 1491 #ifdef _LP64 1492 // Allow Matcher to match the rule which bypass 1493 // ConvI2L operation for an array index on LP64 1494 // if the index value is positive. 1495 if (conv->Opcode() == Op_ConvI2L && 1496 conv->as_Type()->type()->is_long()->_lo >= 0 && 1497 // Are there other uses besides address expressions? 1498 !matcher->is_visited(conv)) { 1499 address_visited.set(conv->_idx); // Flag as address_visited 1500 mstack.push(conv->in(1), Matcher::Pre_Visit); 1501 } else 1502 #endif 1503 mstack.push(conv, Matcher::Pre_Visit); 1504 return true; 1505 } 1506 return false; 1507 } 1508 1509 // Should the Matcher clone shifts on addressing modes, expecting them 1510 // to be subsumed into complex addressing expressions or compute them 1511 // into registers? 1512 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1513 Node *off = m->in(AddPNode::Offset); 1514 if (off->is_Con()) { 1515 address_visited.test_set(m->_idx); // Flag as address_visited 1516 Node *adr = m->in(AddPNode::Address); 1517 1518 // Intel can handle 2 adds in addressing mode 1519 // AtomicAdd is not an addressing expression. 1520 // Cheap to find it by looking for screwy base. 1521 if (adr->is_AddP() && 1522 !adr->in(AddPNode::Base)->is_top() && 1523 // Are there other uses besides address expressions? 1524 !is_visited(adr)) { 1525 address_visited.set(adr->_idx); // Flag as address_visited 1526 Node *shift = adr->in(AddPNode::Offset); 1527 if (!clone_shift(shift, this, mstack, address_visited)) { 1528 mstack.push(shift, Pre_Visit); 1529 } 1530 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1531 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1532 } else { 1533 mstack.push(adr, Pre_Visit); 1534 } 1535 1536 // Clone X+offset as it also folds into most addressing expressions 1537 mstack.push(off, Visit); 1538 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1539 return true; 1540 } else if (clone_shift(off, this, mstack, address_visited)) { 1541 address_visited.test_set(m->_idx); // Flag as address_visited 1542 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1543 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1544 return true; 1545 } 1546 return false; 1547 } 1548 1549 void Compile::reshape_address(AddPNode* addp) { 1550 } 1551 1552 // Helper methods for MachSpillCopyNode::implementation(). 1553 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1554 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1555 // In 64-bit VM size calculation is very complex. Emitting instructions 1556 // into scratch buffer is used to get size in 64-bit VM. 1557 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1558 assert(ireg == Op_VecS || // 32bit vector 1559 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1560 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1561 "no non-adjacent vector moves" ); 1562 if (cbuf) { 1563 MacroAssembler _masm(cbuf); 1564 int offset = __ offset(); 1565 switch (ireg) { 1566 case Op_VecS: // copy whole register 1567 case Op_VecD: 1568 case Op_VecX: 1569 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1570 break; 1571 case Op_VecY: 1572 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1573 break; 1574 case Op_VecZ: 1575 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1576 break; 1577 default: 1578 ShouldNotReachHere(); 1579 } 1580 int size = __ offset() - offset; 1581 #ifdef ASSERT 1582 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1583 assert(!do_size || size == 4, "incorrect size calculattion"); 1584 #endif 1585 return size; 1586 #ifndef PRODUCT 1587 } else if (!do_size) { 1588 switch (ireg) { 1589 case Op_VecS: 1590 case Op_VecD: 1591 case Op_VecX: 1592 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1593 break; 1594 case Op_VecY: 1595 case Op_VecZ: 1596 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1597 break; 1598 default: 1599 ShouldNotReachHere(); 1600 } 1601 #endif 1602 } 1603 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1604 return (UseAVX > 2) ? 6 : 4; 1605 } 1606 1607 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1608 int stack_offset, int reg, uint ireg, outputStream* st) { 1609 // In 64-bit VM size calculation is very complex. Emitting instructions 1610 // into scratch buffer is used to get size in 64-bit VM. 1611 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1612 if (cbuf) { 1613 MacroAssembler _masm(cbuf); 1614 int offset = __ offset(); 1615 if (is_load) { 1616 switch (ireg) { 1617 case Op_VecS: 1618 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1619 break; 1620 case Op_VecD: 1621 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1622 break; 1623 case Op_VecX: 1624 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1625 break; 1626 case Op_VecY: 1627 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1628 break; 1629 case Op_VecZ: 1630 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1631 break; 1632 default: 1633 ShouldNotReachHere(); 1634 } 1635 } else { // store 1636 switch (ireg) { 1637 case Op_VecS: 1638 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1639 break; 1640 case Op_VecD: 1641 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1642 break; 1643 case Op_VecX: 1644 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1645 break; 1646 case Op_VecY: 1647 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1648 break; 1649 case Op_VecZ: 1650 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1651 break; 1652 default: 1653 ShouldNotReachHere(); 1654 } 1655 } 1656 int size = __ offset() - offset; 1657 #ifdef ASSERT 1658 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1659 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1660 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1661 #endif 1662 return size; 1663 #ifndef PRODUCT 1664 } else if (!do_size) { 1665 if (is_load) { 1666 switch (ireg) { 1667 case Op_VecS: 1668 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1669 break; 1670 case Op_VecD: 1671 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1672 break; 1673 case Op_VecX: 1674 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1675 break; 1676 case Op_VecY: 1677 case Op_VecZ: 1678 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1679 break; 1680 default: 1681 ShouldNotReachHere(); 1682 } 1683 } else { // store 1684 switch (ireg) { 1685 case Op_VecS: 1686 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1687 break; 1688 case Op_VecD: 1689 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1690 break; 1691 case Op_VecX: 1692 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1693 break; 1694 case Op_VecY: 1695 case Op_VecZ: 1696 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1697 break; 1698 default: 1699 ShouldNotReachHere(); 1700 } 1701 } 1702 #endif 1703 } 1704 bool is_single_byte = false; 1705 int vec_len = 0; 1706 if ((UseAVX > 2) && (stack_offset != 0)) { 1707 int tuple_type = Assembler::EVEX_FVM; 1708 int input_size = Assembler::EVEX_32bit; 1709 switch (ireg) { 1710 case Op_VecS: 1711 tuple_type = Assembler::EVEX_T1S; 1712 break; 1713 case Op_VecD: 1714 tuple_type = Assembler::EVEX_T1S; 1715 input_size = Assembler::EVEX_64bit; 1716 break; 1717 case Op_VecX: 1718 break; 1719 case Op_VecY: 1720 vec_len = 1; 1721 break; 1722 case Op_VecZ: 1723 vec_len = 2; 1724 break; 1725 } 1726 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1727 } 1728 int offset_size = 0; 1729 int size = 5; 1730 if (UseAVX > 2 ) { 1731 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1732 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1733 size += 2; // Need an additional two bytes for EVEX encoding 1734 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1735 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1736 } else { 1737 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1738 size += 2; // Need an additional two bytes for EVEX encodding 1739 } 1740 } else { 1741 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1742 } 1743 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1744 return size+offset_size; 1745 } 1746 1747 static inline jint replicate4_imm(int con, int width) { 1748 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1749 assert(width == 1 || width == 2, "only byte or short types here"); 1750 int bit_width = width * 8; 1751 jint val = con; 1752 val &= (1 << bit_width) - 1; // mask off sign bits 1753 while(bit_width < 32) { 1754 val |= (val << bit_width); 1755 bit_width <<= 1; 1756 } 1757 return val; 1758 } 1759 1760 static inline jlong replicate8_imm(int con, int width) { 1761 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1762 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1763 int bit_width = width * 8; 1764 jlong val = con; 1765 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1766 while(bit_width < 64) { 1767 val |= (val << bit_width); 1768 bit_width <<= 1; 1769 } 1770 return val; 1771 } 1772 1773 #ifndef PRODUCT 1774 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1775 st->print("nop \t# %d bytes pad for loops and calls", _count); 1776 } 1777 #endif 1778 1779 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1780 MacroAssembler _masm(&cbuf); 1781 __ nop(_count); 1782 } 1783 1784 uint MachNopNode::size(PhaseRegAlloc*) const { 1785 return _count; 1786 } 1787 1788 #ifndef PRODUCT 1789 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1790 st->print("# breakpoint"); 1791 } 1792 #endif 1793 1794 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1795 MacroAssembler _masm(&cbuf); 1796 __ int3(); 1797 } 1798 1799 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1800 return MachNode::size(ra_); 1801 } 1802 1803 %} 1804 1805 encode %{ 1806 1807 enc_class call_epilog %{ 1808 if (VerifyStackAtCalls) { 1809 // Check that stack depth is unchanged: find majik cookie on stack 1810 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1811 MacroAssembler _masm(&cbuf); 1812 Label L; 1813 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1814 __ jccb(Assembler::equal, L); 1815 // Die if stack mismatch 1816 __ int3(); 1817 __ bind(L); 1818 } 1819 %} 1820 1821 %} 1822 1823 1824 //----------OPERANDS----------------------------------------------------------- 1825 // Operand definitions must precede instruction definitions for correct parsing 1826 // in the ADLC because operands constitute user defined types which are used in 1827 // instruction definitions. 1828 1829 // This one generically applies only for evex, so only one version 1830 operand vecZ() %{ 1831 constraint(ALLOC_IN_RC(vectorz_reg)); 1832 match(VecZ); 1833 1834 format %{ %} 1835 interface(REG_INTER); 1836 %} 1837 1838 operand rxmm0() %{ 1839 Â Â constraint(ALLOC_IN_RC(xmm0_reg)); match(VecX); 1840 Â Â predicate((UseSSE > 0) && (UseAVX == 0)); format%{%} interface(REG_INTER); 1841 %} 1842 1843 // Comparison Code for FP conditional move 1844 operand cmpOp_vcmppd() %{ 1845 match(Bool); 1846 1847 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1848 n->as_Bool()->_test._test != BoolTest::no_overflow); 1849 format %{ "" %} 1850 interface(COND_INTER) %{ 1851 equal (0x0, "eq"); 1852 less (0x1, "lt"); 1853 less_equal (0x2, "le"); 1854 not_equal (0xC, "ne"); 1855 greater_equal(0xD, "ge"); 1856 greater (0xE, "gt"); 1857 //TODO cannot compile (adlc breaks) without two next lines with error: 1858 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1859 // equal' for overflow. 1860 overflow (0x20, "o"); // not really supported by the instruction 1861 no_overflow (0x21, "no"); // not really supported by the instruction 1862 %} 1863 %} 1864 1865 1866 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1867 1868 // ============================================================================ 1869 1870 instruct ShouldNotReachHere() %{ 1871 match(Halt); 1872 format %{ "ud2\t# ShouldNotReachHere" %} 1873 ins_encode %{ 1874 __ ud2(); 1875 %} 1876 ins_pipe(pipe_slow); 1877 %} 1878 1879 // =================================EVEX special=============================== 1880 1881 instruct setMask(rRegI dst, rRegI src) %{ 1882 predicate(Matcher::has_predicated_vectors()); 1883 match(Set dst (SetVectMaskI src)); 1884 effect(TEMP dst); 1885 format %{ "setvectmask $dst, $src" %} 1886 ins_encode %{ 1887 __ setvectmask($dst$$Register, $src$$Register); 1888 %} 1889 ins_pipe(pipe_slow); 1890 %} 1891 1892 // ============================================================================ 1893 1894 instruct addF_reg(regF dst, regF src) %{ 1895 predicate((UseSSE>=1) && (UseAVX == 0)); 1896 match(Set dst (AddF dst src)); 1897 1898 format %{ "addss $dst, $src" %} 1899 ins_cost(150); 1900 ins_encode %{ 1901 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1902 %} 1903 ins_pipe(pipe_slow); 1904 %} 1905 1906 instruct addF_mem(regF dst, memory src) %{ 1907 predicate((UseSSE>=1) && (UseAVX == 0)); 1908 match(Set dst (AddF dst (LoadF src))); 1909 1910 format %{ "addss $dst, $src" %} 1911 ins_cost(150); 1912 ins_encode %{ 1913 __ addss($dst$$XMMRegister, $src$$Address); 1914 %} 1915 ins_pipe(pipe_slow); 1916 %} 1917 1918 instruct addF_imm(regF dst, immF con) %{ 1919 predicate((UseSSE>=1) && (UseAVX == 0)); 1920 match(Set dst (AddF dst con)); 1921 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1922 ins_cost(150); 1923 ins_encode %{ 1924 __ addss($dst$$XMMRegister, $constantaddress($con)); 1925 %} 1926 ins_pipe(pipe_slow); 1927 %} 1928 1929 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1930 predicate(UseAVX > 0); 1931 match(Set dst (AddF src1 src2)); 1932 1933 format %{ "vaddss $dst, $src1, $src2" %} 1934 ins_cost(150); 1935 ins_encode %{ 1936 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1937 %} 1938 ins_pipe(pipe_slow); 1939 %} 1940 1941 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1942 predicate(UseAVX > 0); 1943 match(Set dst (AddF src1 (LoadF src2))); 1944 1945 format %{ "vaddss $dst, $src1, $src2" %} 1946 ins_cost(150); 1947 ins_encode %{ 1948 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1949 %} 1950 ins_pipe(pipe_slow); 1951 %} 1952 1953 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1954 predicate(UseAVX > 0); 1955 match(Set dst (AddF src con)); 1956 1957 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1958 ins_cost(150); 1959 ins_encode %{ 1960 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1961 %} 1962 ins_pipe(pipe_slow); 1963 %} 1964 1965 instruct addD_reg(regD dst, regD src) %{ 1966 predicate((UseSSE>=2) && (UseAVX == 0)); 1967 match(Set dst (AddD dst src)); 1968 1969 format %{ "addsd $dst, $src" %} 1970 ins_cost(150); 1971 ins_encode %{ 1972 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1973 %} 1974 ins_pipe(pipe_slow); 1975 %} 1976 1977 instruct addD_mem(regD dst, memory src) %{ 1978 predicate((UseSSE>=2) && (UseAVX == 0)); 1979 match(Set dst (AddD dst (LoadD src))); 1980 1981 format %{ "addsd $dst, $src" %} 1982 ins_cost(150); 1983 ins_encode %{ 1984 __ addsd($dst$$XMMRegister, $src$$Address); 1985 %} 1986 ins_pipe(pipe_slow); 1987 %} 1988 1989 instruct addD_imm(regD dst, immD con) %{ 1990 predicate((UseSSE>=2) && (UseAVX == 0)); 1991 match(Set dst (AddD dst con)); 1992 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1993 ins_cost(150); 1994 ins_encode %{ 1995 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1996 %} 1997 ins_pipe(pipe_slow); 1998 %} 1999 2000 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2001 predicate(UseAVX > 0); 2002 match(Set dst (AddD src1 src2)); 2003 2004 format %{ "vaddsd $dst, $src1, $src2" %} 2005 ins_cost(150); 2006 ins_encode %{ 2007 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2008 %} 2009 ins_pipe(pipe_slow); 2010 %} 2011 2012 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2013 predicate(UseAVX > 0); 2014 match(Set dst (AddD src1 (LoadD src2))); 2015 2016 format %{ "vaddsd $dst, $src1, $src2" %} 2017 ins_cost(150); 2018 ins_encode %{ 2019 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2020 %} 2021 ins_pipe(pipe_slow); 2022 %} 2023 2024 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2025 predicate(UseAVX > 0); 2026 match(Set dst (AddD src con)); 2027 2028 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2029 ins_cost(150); 2030 ins_encode %{ 2031 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2032 %} 2033 ins_pipe(pipe_slow); 2034 %} 2035 2036 instruct subF_reg(regF dst, regF src) %{ 2037 predicate((UseSSE>=1) && (UseAVX == 0)); 2038 match(Set dst (SubF dst src)); 2039 2040 format %{ "subss $dst, $src" %} 2041 ins_cost(150); 2042 ins_encode %{ 2043 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2044 %} 2045 ins_pipe(pipe_slow); 2046 %} 2047 2048 instruct subF_mem(regF dst, memory src) %{ 2049 predicate((UseSSE>=1) && (UseAVX == 0)); 2050 match(Set dst (SubF dst (LoadF src))); 2051 2052 format %{ "subss $dst, $src" %} 2053 ins_cost(150); 2054 ins_encode %{ 2055 __ subss($dst$$XMMRegister, $src$$Address); 2056 %} 2057 ins_pipe(pipe_slow); 2058 %} 2059 2060 instruct subF_imm(regF dst, immF con) %{ 2061 predicate((UseSSE>=1) && (UseAVX == 0)); 2062 match(Set dst (SubF dst con)); 2063 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2064 ins_cost(150); 2065 ins_encode %{ 2066 __ subss($dst$$XMMRegister, $constantaddress($con)); 2067 %} 2068 ins_pipe(pipe_slow); 2069 %} 2070 2071 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2072 predicate(UseAVX > 0); 2073 match(Set dst (SubF src1 src2)); 2074 2075 format %{ "vsubss $dst, $src1, $src2" %} 2076 ins_cost(150); 2077 ins_encode %{ 2078 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2079 %} 2080 ins_pipe(pipe_slow); 2081 %} 2082 2083 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2084 predicate(UseAVX > 0); 2085 match(Set dst (SubF src1 (LoadF src2))); 2086 2087 format %{ "vsubss $dst, $src1, $src2" %} 2088 ins_cost(150); 2089 ins_encode %{ 2090 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2091 %} 2092 ins_pipe(pipe_slow); 2093 %} 2094 2095 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2096 predicate(UseAVX > 0); 2097 match(Set dst (SubF src con)); 2098 2099 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2100 ins_cost(150); 2101 ins_encode %{ 2102 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2103 %} 2104 ins_pipe(pipe_slow); 2105 %} 2106 2107 instruct subD_reg(regD dst, regD src) %{ 2108 predicate((UseSSE>=2) && (UseAVX == 0)); 2109 match(Set dst (SubD dst src)); 2110 2111 format %{ "subsd $dst, $src" %} 2112 ins_cost(150); 2113 ins_encode %{ 2114 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2115 %} 2116 ins_pipe(pipe_slow); 2117 %} 2118 2119 instruct subD_mem(regD dst, memory src) %{ 2120 predicate((UseSSE>=2) && (UseAVX == 0)); 2121 match(Set dst (SubD dst (LoadD src))); 2122 2123 format %{ "subsd $dst, $src" %} 2124 ins_cost(150); 2125 ins_encode %{ 2126 __ subsd($dst$$XMMRegister, $src$$Address); 2127 %} 2128 ins_pipe(pipe_slow); 2129 %} 2130 2131 instruct subD_imm(regD dst, immD con) %{ 2132 predicate((UseSSE>=2) && (UseAVX == 0)); 2133 match(Set dst (SubD dst con)); 2134 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2135 ins_cost(150); 2136 ins_encode %{ 2137 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2138 %} 2139 ins_pipe(pipe_slow); 2140 %} 2141 2142 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2143 predicate(UseAVX > 0); 2144 match(Set dst (SubD src1 src2)); 2145 2146 format %{ "vsubsd $dst, $src1, $src2" %} 2147 ins_cost(150); 2148 ins_encode %{ 2149 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2150 %} 2151 ins_pipe(pipe_slow); 2152 %} 2153 2154 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2155 predicate(UseAVX > 0); 2156 match(Set dst (SubD src1 (LoadD src2))); 2157 2158 format %{ "vsubsd $dst, $src1, $src2" %} 2159 ins_cost(150); 2160 ins_encode %{ 2161 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2162 %} 2163 ins_pipe(pipe_slow); 2164 %} 2165 2166 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2167 predicate(UseAVX > 0); 2168 match(Set dst (SubD src con)); 2169 2170 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2171 ins_cost(150); 2172 ins_encode %{ 2173 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2174 %} 2175 ins_pipe(pipe_slow); 2176 %} 2177 2178 instruct mulF_reg(regF dst, regF src) %{ 2179 predicate((UseSSE>=1) && (UseAVX == 0)); 2180 match(Set dst (MulF dst src)); 2181 2182 format %{ "mulss $dst, $src" %} 2183 ins_cost(150); 2184 ins_encode %{ 2185 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2186 %} 2187 ins_pipe(pipe_slow); 2188 %} 2189 2190 instruct mulF_mem(regF dst, memory src) %{ 2191 predicate((UseSSE>=1) && (UseAVX == 0)); 2192 match(Set dst (MulF dst (LoadF src))); 2193 2194 format %{ "mulss $dst, $src" %} 2195 ins_cost(150); 2196 ins_encode %{ 2197 __ mulss($dst$$XMMRegister, $src$$Address); 2198 %} 2199 ins_pipe(pipe_slow); 2200 %} 2201 2202 instruct mulF_imm(regF dst, immF con) %{ 2203 predicate((UseSSE>=1) && (UseAVX == 0)); 2204 match(Set dst (MulF dst con)); 2205 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2206 ins_cost(150); 2207 ins_encode %{ 2208 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2209 %} 2210 ins_pipe(pipe_slow); 2211 %} 2212 2213 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2214 predicate(UseAVX > 0); 2215 match(Set dst (MulF src1 src2)); 2216 2217 format %{ "vmulss $dst, $src1, $src2" %} 2218 ins_cost(150); 2219 ins_encode %{ 2220 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2221 %} 2222 ins_pipe(pipe_slow); 2223 %} 2224 2225 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2226 predicate(UseAVX > 0); 2227 match(Set dst (MulF src1 (LoadF src2))); 2228 2229 format %{ "vmulss $dst, $src1, $src2" %} 2230 ins_cost(150); 2231 ins_encode %{ 2232 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2233 %} 2234 ins_pipe(pipe_slow); 2235 %} 2236 2237 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2238 predicate(UseAVX > 0); 2239 match(Set dst (MulF src con)); 2240 2241 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2242 ins_cost(150); 2243 ins_encode %{ 2244 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2245 %} 2246 ins_pipe(pipe_slow); 2247 %} 2248 2249 instruct mulD_reg(regD dst, regD src) %{ 2250 predicate((UseSSE>=2) && (UseAVX == 0)); 2251 match(Set dst (MulD dst src)); 2252 2253 format %{ "mulsd $dst, $src" %} 2254 ins_cost(150); 2255 ins_encode %{ 2256 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2257 %} 2258 ins_pipe(pipe_slow); 2259 %} 2260 2261 instruct mulD_mem(regD dst, memory src) %{ 2262 predicate((UseSSE>=2) && (UseAVX == 0)); 2263 match(Set dst (MulD dst (LoadD src))); 2264 2265 format %{ "mulsd $dst, $src" %} 2266 ins_cost(150); 2267 ins_encode %{ 2268 __ mulsd($dst$$XMMRegister, $src$$Address); 2269 %} 2270 ins_pipe(pipe_slow); 2271 %} 2272 2273 instruct mulD_imm(regD dst, immD con) %{ 2274 predicate((UseSSE>=2) && (UseAVX == 0)); 2275 match(Set dst (MulD dst con)); 2276 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2277 ins_cost(150); 2278 ins_encode %{ 2279 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2280 %} 2281 ins_pipe(pipe_slow); 2282 %} 2283 2284 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2285 predicate(UseAVX > 0); 2286 match(Set dst (MulD src1 src2)); 2287 2288 format %{ "vmulsd $dst, $src1, $src2" %} 2289 ins_cost(150); 2290 ins_encode %{ 2291 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2292 %} 2293 ins_pipe(pipe_slow); 2294 %} 2295 2296 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2297 predicate(UseAVX > 0); 2298 match(Set dst (MulD src1 (LoadD src2))); 2299 2300 format %{ "vmulsd $dst, $src1, $src2" %} 2301 ins_cost(150); 2302 ins_encode %{ 2303 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2304 %} 2305 ins_pipe(pipe_slow); 2306 %} 2307 2308 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2309 predicate(UseAVX > 0); 2310 match(Set dst (MulD src con)); 2311 2312 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2313 ins_cost(150); 2314 ins_encode %{ 2315 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2316 %} 2317 ins_pipe(pipe_slow); 2318 %} 2319 2320 instruct divF_reg(regF dst, regF src) %{ 2321 predicate((UseSSE>=1) && (UseAVX == 0)); 2322 match(Set dst (DivF dst src)); 2323 2324 format %{ "divss $dst, $src" %} 2325 ins_cost(150); 2326 ins_encode %{ 2327 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2328 %} 2329 ins_pipe(pipe_slow); 2330 %} 2331 2332 instruct divF_mem(regF dst, memory src) %{ 2333 predicate((UseSSE>=1) && (UseAVX == 0)); 2334 match(Set dst (DivF dst (LoadF src))); 2335 2336 format %{ "divss $dst, $src" %} 2337 ins_cost(150); 2338 ins_encode %{ 2339 __ divss($dst$$XMMRegister, $src$$Address); 2340 %} 2341 ins_pipe(pipe_slow); 2342 %} 2343 2344 instruct divF_imm(regF dst, immF con) %{ 2345 predicate((UseSSE>=1) && (UseAVX == 0)); 2346 match(Set dst (DivF dst con)); 2347 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2348 ins_cost(150); 2349 ins_encode %{ 2350 __ divss($dst$$XMMRegister, $constantaddress($con)); 2351 %} 2352 ins_pipe(pipe_slow); 2353 %} 2354 2355 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2356 predicate(UseAVX > 0); 2357 match(Set dst (DivF src1 src2)); 2358 2359 format %{ "vdivss $dst, $src1, $src2" %} 2360 ins_cost(150); 2361 ins_encode %{ 2362 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2363 %} 2364 ins_pipe(pipe_slow); 2365 %} 2366 2367 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2368 predicate(UseAVX > 0); 2369 match(Set dst (DivF src1 (LoadF src2))); 2370 2371 format %{ "vdivss $dst, $src1, $src2" %} 2372 ins_cost(150); 2373 ins_encode %{ 2374 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2375 %} 2376 ins_pipe(pipe_slow); 2377 %} 2378 2379 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2380 predicate(UseAVX > 0); 2381 match(Set dst (DivF src con)); 2382 2383 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2384 ins_cost(150); 2385 ins_encode %{ 2386 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2387 %} 2388 ins_pipe(pipe_slow); 2389 %} 2390 2391 instruct divD_reg(regD dst, regD src) %{ 2392 predicate((UseSSE>=2) && (UseAVX == 0)); 2393 match(Set dst (DivD dst src)); 2394 2395 format %{ "divsd $dst, $src" %} 2396 ins_cost(150); 2397 ins_encode %{ 2398 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2399 %} 2400 ins_pipe(pipe_slow); 2401 %} 2402 2403 instruct divD_mem(regD dst, memory src) %{ 2404 predicate((UseSSE>=2) && (UseAVX == 0)); 2405 match(Set dst (DivD dst (LoadD src))); 2406 2407 format %{ "divsd $dst, $src" %} 2408 ins_cost(150); 2409 ins_encode %{ 2410 __ divsd($dst$$XMMRegister, $src$$Address); 2411 %} 2412 ins_pipe(pipe_slow); 2413 %} 2414 2415 instruct divD_imm(regD dst, immD con) %{ 2416 predicate((UseSSE>=2) && (UseAVX == 0)); 2417 match(Set dst (DivD dst con)); 2418 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2419 ins_cost(150); 2420 ins_encode %{ 2421 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2422 %} 2423 ins_pipe(pipe_slow); 2424 %} 2425 2426 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2427 predicate(UseAVX > 0); 2428 match(Set dst (DivD src1 src2)); 2429 2430 format %{ "vdivsd $dst, $src1, $src2" %} 2431 ins_cost(150); 2432 ins_encode %{ 2433 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2434 %} 2435 ins_pipe(pipe_slow); 2436 %} 2437 2438 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2439 predicate(UseAVX > 0); 2440 match(Set dst (DivD src1 (LoadD src2))); 2441 2442 format %{ "vdivsd $dst, $src1, $src2" %} 2443 ins_cost(150); 2444 ins_encode %{ 2445 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2446 %} 2447 ins_pipe(pipe_slow); 2448 %} 2449 2450 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2451 predicate(UseAVX > 0); 2452 match(Set dst (DivD src con)); 2453 2454 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2455 ins_cost(150); 2456 ins_encode %{ 2457 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2458 %} 2459 ins_pipe(pipe_slow); 2460 %} 2461 2462 instruct absF_reg(regF dst) %{ 2463 predicate((UseSSE>=1) && (UseAVX == 0)); 2464 match(Set dst (AbsF dst)); 2465 ins_cost(150); 2466 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2467 ins_encode %{ 2468 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2469 %} 2470 ins_pipe(pipe_slow); 2471 %} 2472 2473 instruct absF_reg_reg(regF dst, regF src) %{ 2474 predicate(VM_Version::supports_avxonly()); 2475 match(Set dst (AbsF src)); 2476 ins_cost(150); 2477 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2478 ins_encode %{ 2479 int vector_len = 0; 2480 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2481 ExternalAddress(float_signmask()), vector_len); 2482 %} 2483 ins_pipe(pipe_slow); 2484 %} 2485 2486 #ifdef _LP64 2487 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2488 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2489 match(Set dst (AbsF src)); 2490 ins_cost(150); 2491 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2492 ins_encode %{ 2493 int vector_len = 0; 2494 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2495 ExternalAddress(float_signmask()), vector_len); 2496 %} 2497 ins_pipe(pipe_slow); 2498 %} 2499 2500 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2501 predicate(VM_Version::supports_avx512novl()); 2502 match(Set dst (AbsF src1)); 2503 effect(TEMP src2); 2504 ins_cost(150); 2505 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2506 ins_encode %{ 2507 int vector_len = 0; 2508 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2509 ExternalAddress(float_signmask()), vector_len); 2510 %} 2511 ins_pipe(pipe_slow); 2512 %} 2513 #else // _LP64 2514 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2515 predicate(UseAVX > 2); 2516 match(Set dst (AbsF src)); 2517 ins_cost(150); 2518 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2519 ins_encode %{ 2520 int vector_len = 0; 2521 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2522 ExternalAddress(float_signmask()), vector_len); 2523 %} 2524 ins_pipe(pipe_slow); 2525 %} 2526 #endif 2527 2528 instruct absD_reg(regD dst) %{ 2529 predicate((UseSSE>=2) && (UseAVX == 0)); 2530 match(Set dst (AbsD dst)); 2531 ins_cost(150); 2532 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2533 "# abs double by sign masking" %} 2534 ins_encode %{ 2535 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2536 %} 2537 ins_pipe(pipe_slow); 2538 %} 2539 2540 instruct absD_reg_reg(regD dst, regD src) %{ 2541 predicate(VM_Version::supports_avxonly()); 2542 match(Set dst (AbsD src)); 2543 ins_cost(150); 2544 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2545 "# abs double by sign masking" %} 2546 ins_encode %{ 2547 int vector_len = 0; 2548 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2549 ExternalAddress(double_signmask()), vector_len); 2550 %} 2551 ins_pipe(pipe_slow); 2552 %} 2553 2554 #ifdef _LP64 2555 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2556 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2557 match(Set dst (AbsD src)); 2558 ins_cost(150); 2559 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2560 "# abs double by sign masking" %} 2561 ins_encode %{ 2562 int vector_len = 0; 2563 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2564 ExternalAddress(double_signmask()), vector_len); 2565 %} 2566 ins_pipe(pipe_slow); 2567 %} 2568 2569 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2570 predicate(VM_Version::supports_avx512novl()); 2571 match(Set dst (AbsD src1)); 2572 effect(TEMP src2); 2573 ins_cost(150); 2574 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2575 ins_encode %{ 2576 int vector_len = 0; 2577 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2578 ExternalAddress(double_signmask()), vector_len); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 #else // _LP64 2583 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2584 predicate(UseAVX > 2); 2585 match(Set dst (AbsD src)); 2586 ins_cost(150); 2587 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2588 "# abs double by sign masking" %} 2589 ins_encode %{ 2590 int vector_len = 0; 2591 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2592 ExternalAddress(double_signmask()), vector_len); 2593 %} 2594 ins_pipe(pipe_slow); 2595 %} 2596 #endif 2597 2598 instruct negF_reg(regF dst) %{ 2599 predicate((UseSSE>=1) && (UseAVX == 0)); 2600 match(Set dst (NegF dst)); 2601 ins_cost(150); 2602 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2603 ins_encode %{ 2604 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2605 %} 2606 ins_pipe(pipe_slow); 2607 %} 2608 2609 instruct negF_reg_reg(regF dst, regF src) %{ 2610 predicate(UseAVX > 0); 2611 match(Set dst (NegF src)); 2612 ins_cost(150); 2613 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2614 ins_encode %{ 2615 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2616 ExternalAddress(float_signflip())); 2617 %} 2618 ins_pipe(pipe_slow); 2619 %} 2620 2621 instruct negD_reg(regD dst) %{ 2622 predicate((UseSSE>=2) && (UseAVX == 0)); 2623 match(Set dst (NegD dst)); 2624 ins_cost(150); 2625 format %{ "xorpd $dst, [0x8000000000000000]\t" 2626 "# neg double by sign flipping" %} 2627 ins_encode %{ 2628 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2629 %} 2630 ins_pipe(pipe_slow); 2631 %} 2632 2633 instruct negD_reg_reg(regD dst, regD src) %{ 2634 predicate(UseAVX > 0); 2635 match(Set dst (NegD src)); 2636 ins_cost(150); 2637 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2638 "# neg double by sign flipping" %} 2639 ins_encode %{ 2640 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2641 ExternalAddress(double_signflip())); 2642 %} 2643 ins_pipe(pipe_slow); 2644 %} 2645 2646 instruct sqrtF_reg(regF dst, regF src) %{ 2647 predicate(UseSSE>=1); 2648 match(Set dst (SqrtF src)); 2649 2650 format %{ "sqrtss $dst, $src" %} 2651 ins_cost(150); 2652 ins_encode %{ 2653 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2654 %} 2655 ins_pipe(pipe_slow); 2656 %} 2657 2658 instruct sqrtF_mem(regF dst, memory src) %{ 2659 predicate(UseSSE>=1); 2660 match(Set dst (SqrtF (LoadF src))); 2661 2662 format %{ "sqrtss $dst, $src" %} 2663 ins_cost(150); 2664 ins_encode %{ 2665 __ sqrtss($dst$$XMMRegister, $src$$Address); 2666 %} 2667 ins_pipe(pipe_slow); 2668 %} 2669 2670 instruct sqrtF_imm(regF dst, immF con) %{ 2671 predicate(UseSSE>=1); 2672 match(Set dst (SqrtF con)); 2673 2674 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2675 ins_cost(150); 2676 ins_encode %{ 2677 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2678 %} 2679 ins_pipe(pipe_slow); 2680 %} 2681 2682 instruct sqrtD_reg(regD dst, regD src) %{ 2683 predicate(UseSSE>=2); 2684 match(Set dst (SqrtD src)); 2685 2686 format %{ "sqrtsd $dst, $src" %} 2687 ins_cost(150); 2688 ins_encode %{ 2689 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2690 %} 2691 ins_pipe(pipe_slow); 2692 %} 2693 2694 instruct sqrtD_mem(regD dst, memory src) %{ 2695 predicate(UseSSE>=2); 2696 match(Set dst (SqrtD (LoadD src))); 2697 2698 format %{ "sqrtsd $dst, $src" %} 2699 ins_cost(150); 2700 ins_encode %{ 2701 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2702 %} 2703 ins_pipe(pipe_slow); 2704 %} 2705 2706 instruct sqrtD_imm(regD dst, immD con) %{ 2707 predicate(UseSSE>=2); 2708 match(Set dst (SqrtD con)); 2709 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2710 ins_cost(150); 2711 ins_encode %{ 2712 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2713 %} 2714 ins_pipe(pipe_slow); 2715 %} 2716 2717 instruct onspinwait() %{ 2718 match(OnSpinWait); 2719 ins_cost(200); 2720 2721 format %{ 2722 $$template 2723 if (os::is_MP()) { 2724 $$emit$$"pause\t! membar_onspinwait" 2725 } else { 2726 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2727 } 2728 %} 2729 ins_encode %{ 2730 __ pause(); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 // a * b + c 2736 instruct fmaD_reg(regD a, regD b, regD c) %{ 2737 predicate(UseFMA); 2738 match(Set c (FmaD c (Binary a b))); 2739 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2743 %} 2744 ins_pipe( pipe_slow ); 2745 %} 2746 2747 // a * b + c 2748 instruct fmaF_reg(regF a, regF b, regF c) %{ 2749 predicate(UseFMA); 2750 match(Set c (FmaF c (Binary a b))); 2751 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2755 %} 2756 ins_pipe( pipe_slow ); 2757 %} 2758 2759 // ====================VECTOR INSTRUCTIONS===================================== 2760 2761 // FIXME: eliminate VectorReinterpret nodes before matching 2762 2763 instruct reinterpretS(vecS dst) %{ 2764 match(Set dst (VectorReinterpret dst)); 2765 ins_cost(125); 2766 format %{ " # reinterpret $dst" %} 2767 ins_encode %{ 2768 // empty 2769 %} 2770 ins_pipe( pipe_slow ); 2771 %} 2772 2773 instruct reinterpretD(vecD dst) %{ 2774 match(Set dst (VectorReinterpret dst)); 2775 ins_cost(125); 2776 format %{ " # reinterpret $dst" %} 2777 ins_encode %{ 2778 // empty 2779 %} 2780 ins_pipe( pipe_slow ); 2781 %} 2782 2783 instruct reinterpretX(vecX dst) %{ 2784 match(Set dst (VectorReinterpret dst)); 2785 ins_cost(125); 2786 format %{ " # reinterpret $dst" %} 2787 ins_encode %{ 2788 // empty 2789 %} 2790 ins_pipe( pipe_slow ); 2791 %} 2792 2793 instruct reinterpretY(vecY dst) %{ 2794 match(Set dst (VectorReinterpret dst)); 2795 ins_cost(125); 2796 format %{ " # reinterpret $dst" %} 2797 ins_encode %{ 2798 // empty 2799 %} 2800 ins_pipe( pipe_slow ); 2801 %} 2802 2803 instruct reinterpretZ(vecZ dst) %{ 2804 match(Set dst (VectorReinterpret dst)); 2805 ins_cost(125); 2806 format %{ " # reinterpret $dst" %} 2807 ins_encode %{ 2808 // empty 2809 %} 2810 ins_pipe( pipe_slow ); 2811 %} 2812 2813 // ========== 2814 2815 // Load vectors (1 byte long) 2816 instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ 2817 predicate(n->as_LoadVector()->memory_size() == 1); 2818 match(Set dst (LoadVector mem)); 2819 ins_cost(125); 2820 effect(TEMP tmp); 2821 format %{ "movzbl $tmp,$mem\n\t" 2822 "movd $dst,$tmp\t! load vector (1 byte)" %} 2823 ins_encode %{ 2824 __ movzbl($tmp$$Register, $mem$$Address); 2825 __ movdl($dst$$XMMRegister, $tmp$$Register); 2826 %} 2827 ins_pipe( pipe_slow ); 2828 %} 2829 2830 // Load vectors (2 bytes long) 2831 instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ 2832 predicate(n->as_LoadVector()->memory_size() == 2); 2833 match(Set dst (LoadVector mem)); 2834 ins_cost(125); 2835 effect(TEMP tmp); 2836 format %{ "movzwl $tmp,$mem\n\t" 2837 "movd $dst,$tmp\t! load vector (2 bytes)" %} 2838 ins_encode %{ 2839 __ movzwl($tmp$$Register, $mem$$Address); 2840 __ movdl($dst$$XMMRegister, $tmp$$Register); 2841 %} 2842 ins_pipe( pipe_slow ); 2843 %} 2844 2845 // Load vectors (4 bytes long) 2846 instruct loadV4(vecS dst, memory mem) %{ 2847 predicate(n->as_LoadVector()->memory_size() == 4); 2848 match(Set dst (LoadVector mem)); 2849 ins_cost(125); 2850 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2851 ins_encode %{ 2852 __ movdl($dst$$XMMRegister, $mem$$Address); 2853 %} 2854 ins_pipe( pipe_slow ); 2855 %} 2856 2857 // Load vectors (8 bytes long) 2858 instruct loadV8(vecD dst, memory mem) %{ 2859 predicate(n->as_LoadVector()->memory_size() == 8); 2860 match(Set dst (LoadVector mem)); 2861 ins_cost(125); 2862 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2863 ins_encode %{ 2864 __ movq($dst$$XMMRegister, $mem$$Address); 2865 %} 2866 ins_pipe( pipe_slow ); 2867 %} 2868 2869 // Load vectors (16 bytes long) 2870 instruct loadV16(vecX dst, memory mem) %{ 2871 predicate(n->as_LoadVector()->memory_size() == 16); 2872 match(Set dst (LoadVector mem)); 2873 ins_cost(125); 2874 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2875 ins_encode %{ 2876 __ movdqu($dst$$XMMRegister, $mem$$Address); 2877 %} 2878 ins_pipe( pipe_slow ); 2879 %} 2880 2881 // Load vectors (32 bytes long) 2882 instruct loadV32(vecY dst, memory mem) %{ 2883 predicate(n->as_LoadVector()->memory_size() == 32); 2884 match(Set dst (LoadVector mem)); 2885 ins_cost(125); 2886 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2887 ins_encode %{ 2888 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2889 %} 2890 ins_pipe( pipe_slow ); 2891 %} 2892 2893 // Load vectors (64 bytes long) 2894 instruct loadV64_dword(vecZ dst, memory mem) %{ 2895 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2896 match(Set dst (LoadVector mem)); 2897 ins_cost(125); 2898 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2899 ins_encode %{ 2900 int vector_len = 2; 2901 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2902 %} 2903 ins_pipe( pipe_slow ); 2904 %} 2905 2906 // Load vectors (64 bytes long) 2907 instruct loadV64_qword(vecZ dst, memory mem) %{ 2908 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2909 match(Set dst (LoadVector mem)); 2910 ins_cost(125); 2911 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2912 ins_encode %{ 2913 int vector_len = 2; 2914 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2915 %} 2916 ins_pipe( pipe_slow ); 2917 %} 2918 2919 // Store vectors 2920 instruct storeV1(memory mem, vecS src, rRegI tmp) %{ 2921 predicate(n->as_StoreVector()->memory_size() == 1); 2922 match(Set mem (StoreVector mem src)); 2923 ins_cost(145); 2924 effect(TEMP tmp); 2925 format %{ "movd $tmp,$src\n\t" 2926 "movb $mem,$tmp\t! store vector (1 byte)" %} 2927 ins_encode %{ 2928 __ movdl($tmp$$Register, $src$$XMMRegister); 2929 __ movb($mem$$Address, $tmp$$Register); 2930 %} 2931 ins_pipe( pipe_slow ); 2932 %} 2933 2934 instruct storeV2(memory mem, vecS src, rRegI tmp) %{ 2935 predicate(n->as_StoreVector()->memory_size() == 2); 2936 match(Set mem (StoreVector mem src)); 2937 ins_cost(145); 2938 effect(TEMP tmp); 2939 format %{ "movd $tmp,$src\n\t" 2940 "movw $mem,$tmp\t! store vector (2 bytes)" %} 2941 ins_encode %{ 2942 __ movdl($tmp$$Register, $src$$XMMRegister); 2943 __ movw($mem$$Address, $tmp$$Register); 2944 %} 2945 ins_pipe( pipe_slow ); 2946 %} 2947 2948 instruct storeV4(memory mem, vecS src) %{ 2949 predicate(n->as_StoreVector()->memory_size() == 4); 2950 match(Set mem (StoreVector mem src)); 2951 ins_cost(145); 2952 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2953 ins_encode %{ 2954 __ movdl($mem$$Address, $src$$XMMRegister); 2955 %} 2956 ins_pipe( pipe_slow ); 2957 %} 2958 2959 instruct storeV8(memory mem, vecD src) %{ 2960 predicate(n->as_StoreVector()->memory_size() == 8); 2961 match(Set mem (StoreVector mem src)); 2962 ins_cost(145); 2963 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2964 ins_encode %{ 2965 __ movq($mem$$Address, $src$$XMMRegister); 2966 %} 2967 ins_pipe( pipe_slow ); 2968 %} 2969 2970 instruct storeV16(memory mem, vecX src) %{ 2971 predicate(n->as_StoreVector()->memory_size() == 16); 2972 match(Set mem (StoreVector mem src)); 2973 ins_cost(145); 2974 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2975 ins_encode %{ 2976 __ movdqu($mem$$Address, $src$$XMMRegister); 2977 %} 2978 ins_pipe( pipe_slow ); 2979 %} 2980 2981 instruct storeV32(memory mem, vecY src) %{ 2982 predicate(n->as_StoreVector()->memory_size() == 32); 2983 match(Set mem (StoreVector mem src)); 2984 ins_cost(145); 2985 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2986 ins_encode %{ 2987 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2988 %} 2989 ins_pipe( pipe_slow ); 2990 %} 2991 2992 instruct storeV64_dword(memory mem, vecZ src) %{ 2993 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2994 match(Set mem (StoreVector mem src)); 2995 ins_cost(145); 2996 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2997 ins_encode %{ 2998 int vector_len = 2; 2999 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3000 %} 3001 ins_pipe( pipe_slow ); 3002 %} 3003 3004 instruct storeV64_qword(memory mem, vecZ src) %{ 3005 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3006 match(Set mem (StoreVector mem src)); 3007 ins_cost(145); 3008 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3009 ins_encode %{ 3010 int vector_len = 2; 3011 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3012 %} 3013 ins_pipe( pipe_slow ); 3014 %} 3015 3016 // ====================LEGACY REPLICATE======================================= 3017 3018 instruct Repl4B_mem(vecS dst, memory mem) %{ 3019 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3020 match(Set dst (ReplicateB (LoadB mem))); 3021 format %{ "punpcklbw $dst,$mem\n\t" 3022 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3023 ins_encode %{ 3024 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3025 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3026 %} 3027 ins_pipe( pipe_slow ); 3028 %} 3029 3030 instruct Repl8B_mem(vecD dst, memory mem) %{ 3031 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3032 match(Set dst (ReplicateB (LoadB mem))); 3033 format %{ "punpcklbw $dst,$mem\n\t" 3034 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3035 ins_encode %{ 3036 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3037 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3038 %} 3039 ins_pipe( pipe_slow ); 3040 %} 3041 3042 instruct Repl16B(vecX dst, rRegI src) %{ 3043 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3044 match(Set dst (ReplicateB src)); 3045 format %{ "movd $dst,$src\n\t" 3046 "punpcklbw $dst,$dst\n\t" 3047 "pshuflw $dst,$dst,0x00\n\t" 3048 "punpcklqdq $dst,$dst\t! replicate16B" %} 3049 ins_encode %{ 3050 __ movdl($dst$$XMMRegister, $src$$Register); 3051 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3052 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3053 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3054 %} 3055 ins_pipe( pipe_slow ); 3056 %} 3057 3058 instruct Repl16B_mem(vecX dst, memory mem) %{ 3059 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3060 match(Set dst (ReplicateB (LoadB mem))); 3061 format %{ "punpcklbw $dst,$mem\n\t" 3062 "pshuflw $dst,$dst,0x00\n\t" 3063 "punpcklqdq $dst,$dst\t! replicate16B" %} 3064 ins_encode %{ 3065 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3066 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3067 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3068 %} 3069 ins_pipe( pipe_slow ); 3070 %} 3071 3072 instruct Repl32B(vecY dst, rRegI src) %{ 3073 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3074 match(Set dst (ReplicateB src)); 3075 format %{ "movd $dst,$src\n\t" 3076 "punpcklbw $dst,$dst\n\t" 3077 "pshuflw $dst,$dst,0x00\n\t" 3078 "punpcklqdq $dst,$dst\n\t" 3079 "vinserti128_high $dst,$dst\t! replicate32B" %} 3080 ins_encode %{ 3081 __ movdl($dst$$XMMRegister, $src$$Register); 3082 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3083 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3084 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3085 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3086 %} 3087 ins_pipe( pipe_slow ); 3088 %} 3089 3090 instruct Repl32B_mem(vecY dst, memory mem) %{ 3091 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3092 match(Set dst (ReplicateB (LoadB mem))); 3093 format %{ "punpcklbw $dst,$mem\n\t" 3094 "pshuflw $dst,$dst,0x00\n\t" 3095 "punpcklqdq $dst,$dst\n\t" 3096 "vinserti128_high $dst,$dst\t! replicate32B" %} 3097 ins_encode %{ 3098 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3099 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3100 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3101 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3102 %} 3103 ins_pipe( pipe_slow ); 3104 %} 3105 3106 instruct Repl16B_imm(vecX dst, immI con) %{ 3107 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3108 match(Set dst (ReplicateB con)); 3109 format %{ "movq $dst,[$constantaddress]\n\t" 3110 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3111 ins_encode %{ 3112 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3113 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3114 %} 3115 ins_pipe( pipe_slow ); 3116 %} 3117 3118 instruct Repl32B_imm(vecY dst, immI con) %{ 3119 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3120 match(Set dst (ReplicateB con)); 3121 format %{ "movq $dst,[$constantaddress]\n\t" 3122 "punpcklqdq $dst,$dst\n\t" 3123 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3124 ins_encode %{ 3125 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3126 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3127 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3128 %} 3129 ins_pipe( pipe_slow ); 3130 %} 3131 3132 instruct Repl4S(vecD dst, rRegI src) %{ 3133 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3134 match(Set dst (ReplicateS src)); 3135 format %{ "movd $dst,$src\n\t" 3136 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3137 ins_encode %{ 3138 __ movdl($dst$$XMMRegister, $src$$Register); 3139 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3140 %} 3141 ins_pipe( pipe_slow ); 3142 %} 3143 3144 instruct Repl4S_mem(vecD dst, memory mem) %{ 3145 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3146 match(Set dst (ReplicateS (LoadS mem))); 3147 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3148 ins_encode %{ 3149 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3150 %} 3151 ins_pipe( pipe_slow ); 3152 %} 3153 3154 instruct Repl8S(vecX dst, rRegI src) %{ 3155 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3156 match(Set dst (ReplicateS src)); 3157 format %{ "movd $dst,$src\n\t" 3158 "pshuflw $dst,$dst,0x00\n\t" 3159 "punpcklqdq $dst,$dst\t! replicate8S" %} 3160 ins_encode %{ 3161 __ movdl($dst$$XMMRegister, $src$$Register); 3162 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3163 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3164 %} 3165 ins_pipe( pipe_slow ); 3166 %} 3167 3168 instruct Repl8S_mem(vecX dst, memory mem) %{ 3169 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3170 match(Set dst (ReplicateS (LoadS mem))); 3171 format %{ "pshuflw $dst,$mem,0x00\n\t" 3172 "punpcklqdq $dst,$dst\t! replicate8S" %} 3173 ins_encode %{ 3174 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3175 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3176 %} 3177 ins_pipe( pipe_slow ); 3178 %} 3179 3180 instruct Repl8S_imm(vecX dst, immI con) %{ 3181 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3182 match(Set dst (ReplicateS con)); 3183 format %{ "movq $dst,[$constantaddress]\n\t" 3184 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3185 ins_encode %{ 3186 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3187 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3188 %} 3189 ins_pipe( pipe_slow ); 3190 %} 3191 3192 instruct Repl16S(vecY dst, rRegI src) %{ 3193 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3194 match(Set dst (ReplicateS src)); 3195 format %{ "movd $dst,$src\n\t" 3196 "pshuflw $dst,$dst,0x00\n\t" 3197 "punpcklqdq $dst,$dst\n\t" 3198 "vinserti128_high $dst,$dst\t! replicate16S" %} 3199 ins_encode %{ 3200 __ movdl($dst$$XMMRegister, $src$$Register); 3201 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3202 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3203 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3204 %} 3205 ins_pipe( pipe_slow ); 3206 %} 3207 3208 instruct Repl16S_mem(vecY dst, memory mem) %{ 3209 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3210 match(Set dst (ReplicateS (LoadS mem))); 3211 format %{ "pshuflw $dst,$mem,0x00\n\t" 3212 "punpcklqdq $dst,$dst\n\t" 3213 "vinserti128_high $dst,$dst\t! replicate16S" %} 3214 ins_encode %{ 3215 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3216 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3217 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3218 %} 3219 ins_pipe( pipe_slow ); 3220 %} 3221 3222 instruct Repl16S_imm(vecY dst, immI con) %{ 3223 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3224 match(Set dst (ReplicateS con)); 3225 format %{ "movq $dst,[$constantaddress]\n\t" 3226 "punpcklqdq $dst,$dst\n\t" 3227 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3228 ins_encode %{ 3229 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3230 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3231 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3232 %} 3233 ins_pipe( pipe_slow ); 3234 %} 3235 3236 instruct Repl4I(vecX dst, rRegI src) %{ 3237 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3238 match(Set dst (ReplicateI src)); 3239 format %{ "movd $dst,$src\n\t" 3240 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3241 ins_encode %{ 3242 __ movdl($dst$$XMMRegister, $src$$Register); 3243 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct Repl4I_mem(vecX dst, memory mem) %{ 3249 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3250 match(Set dst (ReplicateI (LoadI mem))); 3251 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3252 ins_encode %{ 3253 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3254 %} 3255 ins_pipe( pipe_slow ); 3256 %} 3257 3258 instruct Repl8I(vecY dst, rRegI src) %{ 3259 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3260 match(Set dst (ReplicateI src)); 3261 format %{ "movd $dst,$src\n\t" 3262 "pshufd $dst,$dst,0x00\n\t" 3263 "vinserti128_high $dst,$dst\t! replicate8I" %} 3264 ins_encode %{ 3265 __ movdl($dst$$XMMRegister, $src$$Register); 3266 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3267 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3268 %} 3269 ins_pipe( pipe_slow ); 3270 %} 3271 3272 instruct Repl8I_mem(vecY dst, memory mem) %{ 3273 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3274 match(Set dst (ReplicateI (LoadI mem))); 3275 format %{ "pshufd $dst,$mem,0x00\n\t" 3276 "vinserti128_high $dst,$dst\t! replicate8I" %} 3277 ins_encode %{ 3278 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3279 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3280 %} 3281 ins_pipe( pipe_slow ); 3282 %} 3283 3284 instruct Repl4I_imm(vecX dst, immI con) %{ 3285 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3286 match(Set dst (ReplicateI con)); 3287 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3288 "punpcklqdq $dst,$dst" %} 3289 ins_encode %{ 3290 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3291 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3292 %} 3293 ins_pipe( pipe_slow ); 3294 %} 3295 3296 instruct Repl8I_imm(vecY dst, immI con) %{ 3297 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3298 match(Set dst (ReplicateI con)); 3299 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3300 "punpcklqdq $dst,$dst\n\t" 3301 "vinserti128_high $dst,$dst" %} 3302 ins_encode %{ 3303 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3304 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3305 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3306 %} 3307 ins_pipe( pipe_slow ); 3308 %} 3309 3310 // Long could be loaded into xmm register directly from memory. 3311 instruct Repl2L_mem(vecX dst, memory mem) %{ 3312 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3313 match(Set dst (ReplicateL (LoadL mem))); 3314 format %{ "movq $dst,$mem\n\t" 3315 "punpcklqdq $dst,$dst\t! replicate2L" %} 3316 ins_encode %{ 3317 __ movq($dst$$XMMRegister, $mem$$Address); 3318 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3319 %} 3320 ins_pipe( pipe_slow ); 3321 %} 3322 3323 // Replicate long (8 byte) scalar to be vector 3324 #ifdef _LP64 3325 instruct Repl4L(vecY dst, rRegL src) %{ 3326 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3327 match(Set dst (ReplicateL src)); 3328 format %{ "movdq $dst,$src\n\t" 3329 "punpcklqdq $dst,$dst\n\t" 3330 "vinserti128_high $dst,$dst\t! replicate4L" %} 3331 ins_encode %{ 3332 __ movdq($dst$$XMMRegister, $src$$Register); 3333 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3334 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3335 %} 3336 ins_pipe( pipe_slow ); 3337 %} 3338 #else // _LP64 3339 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3340 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3341 match(Set dst (ReplicateL src)); 3342 effect(TEMP dst, USE src, TEMP tmp); 3343 format %{ "movdl $dst,$src.lo\n\t" 3344 "movdl $tmp,$src.hi\n\t" 3345 "punpckldq $dst,$tmp\n\t" 3346 "punpcklqdq $dst,$dst\n\t" 3347 "vinserti128_high $dst,$dst\t! replicate4L" %} 3348 ins_encode %{ 3349 __ movdl($dst$$XMMRegister, $src$$Register); 3350 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3351 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3352 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3353 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3354 %} 3355 ins_pipe( pipe_slow ); 3356 %} 3357 #endif // _LP64 3358 3359 instruct Repl4L_imm(vecY dst, immL con) %{ 3360 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3361 match(Set dst (ReplicateL con)); 3362 format %{ "movq $dst,[$constantaddress]\n\t" 3363 "punpcklqdq $dst,$dst\n\t" 3364 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3365 ins_encode %{ 3366 __ movq($dst$$XMMRegister, $constantaddress($con)); 3367 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3368 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3369 %} 3370 ins_pipe( pipe_slow ); 3371 %} 3372 3373 instruct Repl4L_mem(vecY dst, memory mem) %{ 3374 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3375 match(Set dst (ReplicateL (LoadL mem))); 3376 format %{ "movq $dst,$mem\n\t" 3377 "punpcklqdq $dst,$dst\n\t" 3378 "vinserti128_high $dst,$dst\t! replicate4L" %} 3379 ins_encode %{ 3380 __ movq($dst$$XMMRegister, $mem$$Address); 3381 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3382 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3383 %} 3384 ins_pipe( pipe_slow ); 3385 %} 3386 3387 instruct Repl2F_mem(vecD dst, memory mem) %{ 3388 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3389 match(Set dst (ReplicateF (LoadF mem))); 3390 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3391 ins_encode %{ 3392 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3393 %} 3394 ins_pipe( pipe_slow ); 3395 %} 3396 3397 instruct Repl4F_mem(vecX dst, memory mem) %{ 3398 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3399 match(Set dst (ReplicateF (LoadF mem))); 3400 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3401 ins_encode %{ 3402 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3403 %} 3404 ins_pipe( pipe_slow ); 3405 %} 3406 3407 instruct Repl8F(vecY dst, regF src) %{ 3408 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3409 match(Set dst (ReplicateF src)); 3410 format %{ "pshufd $dst,$src,0x00\n\t" 3411 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3412 ins_encode %{ 3413 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3414 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3415 %} 3416 ins_pipe( pipe_slow ); 3417 %} 3418 3419 instruct Repl8F_mem(vecY dst, memory mem) %{ 3420 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3421 match(Set dst (ReplicateF (LoadF mem))); 3422 format %{ "pshufd $dst,$mem,0x00\n\t" 3423 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3424 ins_encode %{ 3425 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3426 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3427 %} 3428 ins_pipe( pipe_slow ); 3429 %} 3430 3431 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3432 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3433 match(Set dst (ReplicateF zero)); 3434 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3435 ins_encode %{ 3436 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3437 %} 3438 ins_pipe( fpu_reg_reg ); 3439 %} 3440 3441 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3442 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3443 match(Set dst (ReplicateF zero)); 3444 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3445 ins_encode %{ 3446 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3447 %} 3448 ins_pipe( fpu_reg_reg ); 3449 %} 3450 3451 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3452 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3453 match(Set dst (ReplicateF zero)); 3454 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3455 ins_encode %{ 3456 int vector_len = 1; 3457 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3458 %} 3459 ins_pipe( fpu_reg_reg ); 3460 %} 3461 3462 instruct Repl2D_mem(vecX dst, memory mem) %{ 3463 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3464 match(Set dst (ReplicateD (LoadD mem))); 3465 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3466 ins_encode %{ 3467 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3468 %} 3469 ins_pipe( pipe_slow ); 3470 %} 3471 3472 instruct Repl4D(vecY dst, regD src) %{ 3473 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3474 match(Set dst (ReplicateD src)); 3475 format %{ "pshufd $dst,$src,0x44\n\t" 3476 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3477 ins_encode %{ 3478 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3479 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3480 %} 3481 ins_pipe( pipe_slow ); 3482 %} 3483 3484 instruct Repl4D_mem(vecY dst, memory mem) %{ 3485 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3486 match(Set dst (ReplicateD (LoadD mem))); 3487 format %{ "pshufd $dst,$mem,0x44\n\t" 3488 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3489 ins_encode %{ 3490 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3491 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3492 %} 3493 ins_pipe( pipe_slow ); 3494 %} 3495 3496 // Replicate double (8 byte) scalar zero to be vector 3497 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3498 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3499 match(Set dst (ReplicateD zero)); 3500 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3501 ins_encode %{ 3502 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3503 %} 3504 ins_pipe( fpu_reg_reg ); 3505 %} 3506 3507 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3508 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3509 match(Set dst (ReplicateD zero)); 3510 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3511 ins_encode %{ 3512 int vector_len = 1; 3513 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3514 %} 3515 ins_pipe( fpu_reg_reg ); 3516 %} 3517 3518 // ====================GENERIC REPLICATE========================================== 3519 3520 // Replicate byte scalar to be vector 3521 instruct Repl4B(vecS dst, rRegI src) %{ 3522 predicate(n->as_Vector()->length() == 4); 3523 match(Set dst (ReplicateB src)); 3524 format %{ "movd $dst,$src\n\t" 3525 "punpcklbw $dst,$dst\n\t" 3526 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3527 ins_encode %{ 3528 __ movdl($dst$$XMMRegister, $src$$Register); 3529 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3530 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3531 %} 3532 ins_pipe( pipe_slow ); 3533 %} 3534 3535 instruct Repl8B(vecD dst, rRegI src) %{ 3536 predicate(n->as_Vector()->length() == 8); 3537 match(Set dst (ReplicateB src)); 3538 format %{ "movd $dst,$src\n\t" 3539 "punpcklbw $dst,$dst\n\t" 3540 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3541 ins_encode %{ 3542 __ movdl($dst$$XMMRegister, $src$$Register); 3543 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3544 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3545 %} 3546 ins_pipe( pipe_slow ); 3547 %} 3548 3549 // Replicate byte scalar immediate to be vector by loading from const table. 3550 instruct Repl4B_imm(vecS dst, immI con) %{ 3551 predicate(n->as_Vector()->length() == 4); 3552 match(Set dst (ReplicateB con)); 3553 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3554 ins_encode %{ 3555 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3556 %} 3557 ins_pipe( pipe_slow ); 3558 %} 3559 3560 instruct Repl8B_imm(vecD dst, immI con) %{ 3561 predicate(n->as_Vector()->length() == 8); 3562 match(Set dst (ReplicateB con)); 3563 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3564 ins_encode %{ 3565 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3566 %} 3567 ins_pipe( pipe_slow ); 3568 %} 3569 3570 // Replicate byte scalar zero to be vector 3571 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3572 predicate(n->as_Vector()->length() == 4); 3573 match(Set dst (ReplicateB zero)); 3574 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3575 ins_encode %{ 3576 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3577 %} 3578 ins_pipe( fpu_reg_reg ); 3579 %} 3580 3581 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3582 predicate(n->as_Vector()->length() == 8); 3583 match(Set dst (ReplicateB zero)); 3584 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3585 ins_encode %{ 3586 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3587 %} 3588 ins_pipe( fpu_reg_reg ); 3589 %} 3590 3591 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3592 predicate(n->as_Vector()->length() == 16); 3593 match(Set dst (ReplicateB zero)); 3594 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3595 ins_encode %{ 3596 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3597 %} 3598 ins_pipe( fpu_reg_reg ); 3599 %} 3600 3601 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3602 predicate(n->as_Vector()->length() == 32); 3603 match(Set dst (ReplicateB zero)); 3604 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3605 ins_encode %{ 3606 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3607 int vector_len = 1; 3608 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3609 %} 3610 ins_pipe( fpu_reg_reg ); 3611 %} 3612 3613 // Replicate char/short (2 byte) scalar to be vector 3614 instruct Repl2S(vecS dst, rRegI src) %{ 3615 predicate(n->as_Vector()->length() == 2); 3616 match(Set dst (ReplicateS src)); 3617 format %{ "movd $dst,$src\n\t" 3618 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3619 ins_encode %{ 3620 __ movdl($dst$$XMMRegister, $src$$Register); 3621 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3622 %} 3623 ins_pipe( fpu_reg_reg ); 3624 %} 3625 3626 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3627 instruct Repl2S_imm(vecS dst, immI con) %{ 3628 predicate(n->as_Vector()->length() == 2); 3629 match(Set dst (ReplicateS con)); 3630 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3631 ins_encode %{ 3632 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3633 %} 3634 ins_pipe( fpu_reg_reg ); 3635 %} 3636 3637 instruct Repl4S_imm(vecD dst, immI con) %{ 3638 predicate(n->as_Vector()->length() == 4); 3639 match(Set dst (ReplicateS con)); 3640 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3641 ins_encode %{ 3642 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3643 %} 3644 ins_pipe( fpu_reg_reg ); 3645 %} 3646 3647 // Replicate char/short (2 byte) scalar zero to be vector 3648 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3649 predicate(n->as_Vector()->length() == 2); 3650 match(Set dst (ReplicateS zero)); 3651 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3652 ins_encode %{ 3653 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3654 %} 3655 ins_pipe( fpu_reg_reg ); 3656 %} 3657 3658 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3659 predicate(n->as_Vector()->length() == 4); 3660 match(Set dst (ReplicateS zero)); 3661 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3662 ins_encode %{ 3663 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3664 %} 3665 ins_pipe( fpu_reg_reg ); 3666 %} 3667 3668 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3669 predicate(n->as_Vector()->length() == 8); 3670 match(Set dst (ReplicateS zero)); 3671 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3672 ins_encode %{ 3673 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3674 %} 3675 ins_pipe( fpu_reg_reg ); 3676 %} 3677 3678 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3679 predicate(n->as_Vector()->length() == 16); 3680 match(Set dst (ReplicateS zero)); 3681 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3682 ins_encode %{ 3683 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3684 int vector_len = 1; 3685 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3686 %} 3687 ins_pipe( fpu_reg_reg ); 3688 %} 3689 3690 // Replicate integer (4 byte) scalar to be vector 3691 instruct Repl2I(vecD dst, rRegI src) %{ 3692 predicate(n->as_Vector()->length() == 2); 3693 match(Set dst (ReplicateI src)); 3694 format %{ "movd $dst,$src\n\t" 3695 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3696 ins_encode %{ 3697 __ movdl($dst$$XMMRegister, $src$$Register); 3698 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3699 %} 3700 ins_pipe( fpu_reg_reg ); 3701 %} 3702 3703 // Integer could be loaded into xmm register directly from memory. 3704 instruct Repl2I_mem(vecD dst, memory mem) %{ 3705 predicate(n->as_Vector()->length() == 2); 3706 match(Set dst (ReplicateI (LoadI mem))); 3707 format %{ "movd $dst,$mem\n\t" 3708 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3709 ins_encode %{ 3710 __ movdl($dst$$XMMRegister, $mem$$Address); 3711 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3712 %} 3713 ins_pipe( fpu_reg_reg ); 3714 %} 3715 3716 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3717 instruct Repl2I_imm(vecD dst, immI con) %{ 3718 predicate(n->as_Vector()->length() == 2); 3719 match(Set dst (ReplicateI con)); 3720 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3721 ins_encode %{ 3722 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3723 %} 3724 ins_pipe( fpu_reg_reg ); 3725 %} 3726 3727 // Replicate integer (4 byte) scalar zero to be vector 3728 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3729 predicate(n->as_Vector()->length() == 2); 3730 match(Set dst (ReplicateI zero)); 3731 format %{ "pxor $dst,$dst\t! replicate2I" %} 3732 ins_encode %{ 3733 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3734 %} 3735 ins_pipe( fpu_reg_reg ); 3736 %} 3737 3738 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3739 predicate(n->as_Vector()->length() == 4); 3740 match(Set dst (ReplicateI zero)); 3741 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3742 ins_encode %{ 3743 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3744 %} 3745 ins_pipe( fpu_reg_reg ); 3746 %} 3747 3748 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3749 predicate(n->as_Vector()->length() == 8); 3750 match(Set dst (ReplicateI zero)); 3751 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3752 ins_encode %{ 3753 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3754 int vector_len = 1; 3755 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3756 %} 3757 ins_pipe( fpu_reg_reg ); 3758 %} 3759 3760 // Replicate long (8 byte) scalar to be vector 3761 #ifdef _LP64 3762 instruct Repl2L(vecX dst, rRegL src) %{ 3763 predicate(n->as_Vector()->length() == 2); 3764 match(Set dst (ReplicateL src)); 3765 format %{ "movdq $dst,$src\n\t" 3766 "punpcklqdq $dst,$dst\t! replicate2L" %} 3767 ins_encode %{ 3768 __ movdq($dst$$XMMRegister, $src$$Register); 3769 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 #else // _LP64 3774 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3775 predicate(n->as_Vector()->length() == 2); 3776 match(Set dst (ReplicateL src)); 3777 effect(TEMP dst, USE src, TEMP tmp); 3778 format %{ "movdl $dst,$src.lo\n\t" 3779 "movdl $tmp,$src.hi\n\t" 3780 "punpckldq $dst,$tmp\n\t" 3781 "punpcklqdq $dst,$dst\t! replicate2L"%} 3782 ins_encode %{ 3783 __ movdl($dst$$XMMRegister, $src$$Register); 3784 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3785 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3786 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3787 %} 3788 ins_pipe( pipe_slow ); 3789 %} 3790 #endif // _LP64 3791 3792 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3793 instruct Repl2L_imm(vecX dst, immL con) %{ 3794 predicate(n->as_Vector()->length() == 2); 3795 match(Set dst (ReplicateL con)); 3796 format %{ "movq $dst,[$constantaddress]\n\t" 3797 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3798 ins_encode %{ 3799 __ movq($dst$$XMMRegister, $constantaddress($con)); 3800 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3801 %} 3802 ins_pipe( pipe_slow ); 3803 %} 3804 3805 // Replicate long (8 byte) scalar zero to be vector 3806 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3807 predicate(n->as_Vector()->length() == 2); 3808 match(Set dst (ReplicateL zero)); 3809 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3810 ins_encode %{ 3811 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3812 %} 3813 ins_pipe( fpu_reg_reg ); 3814 %} 3815 3816 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3817 predicate(n->as_Vector()->length() == 4); 3818 match(Set dst (ReplicateL zero)); 3819 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3820 ins_encode %{ 3821 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3822 int vector_len = 1; 3823 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3824 %} 3825 ins_pipe( fpu_reg_reg ); 3826 %} 3827 3828 // Replicate float (4 byte) scalar to be vector 3829 instruct Repl2F(vecD dst, regF src) %{ 3830 predicate(n->as_Vector()->length() == 2); 3831 match(Set dst (ReplicateF src)); 3832 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3833 ins_encode %{ 3834 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3835 %} 3836 ins_pipe( fpu_reg_reg ); 3837 %} 3838 3839 instruct Repl4F(vecX dst, regF src) %{ 3840 predicate(n->as_Vector()->length() == 4); 3841 match(Set dst (ReplicateF src)); 3842 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3843 ins_encode %{ 3844 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3845 %} 3846 ins_pipe( pipe_slow ); 3847 %} 3848 3849 // Replicate double (8 bytes) scalar to be vector 3850 instruct Repl2D(vecX dst, regD src) %{ 3851 predicate(n->as_Vector()->length() == 2); 3852 match(Set dst (ReplicateD src)); 3853 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3854 ins_encode %{ 3855 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3856 %} 3857 ins_pipe( pipe_slow ); 3858 %} 3859 3860 // ====================EVEX REPLICATE============================================= 3861 3862 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3863 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3864 match(Set dst (ReplicateB (LoadB mem))); 3865 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3866 ins_encode %{ 3867 int vector_len = 0; 3868 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3869 %} 3870 ins_pipe( pipe_slow ); 3871 %} 3872 3873 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3874 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3875 match(Set dst (ReplicateB (LoadB mem))); 3876 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3877 ins_encode %{ 3878 int vector_len = 0; 3879 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3880 %} 3881 ins_pipe( pipe_slow ); 3882 %} 3883 3884 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3885 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3886 match(Set dst (ReplicateB src)); 3887 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3888 ins_encode %{ 3889 int vector_len = 0; 3890 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3891 %} 3892 ins_pipe( pipe_slow ); 3893 %} 3894 3895 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3896 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3897 match(Set dst (ReplicateB (LoadB mem))); 3898 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3899 ins_encode %{ 3900 int vector_len = 0; 3901 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3902 %} 3903 ins_pipe( pipe_slow ); 3904 %} 3905 3906 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3907 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3908 match(Set dst (ReplicateB src)); 3909 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3910 ins_encode %{ 3911 int vector_len = 1; 3912 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3913 %} 3914 ins_pipe( pipe_slow ); 3915 %} 3916 3917 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3918 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3919 match(Set dst (ReplicateB (LoadB mem))); 3920 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3921 ins_encode %{ 3922 int vector_len = 1; 3923 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3929 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3930 match(Set dst (ReplicateB src)); 3931 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3932 ins_encode %{ 3933 int vector_len = 2; 3934 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3935 %} 3936 ins_pipe( pipe_slow ); 3937 %} 3938 3939 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3940 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3941 match(Set dst (ReplicateB (LoadB mem))); 3942 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3943 ins_encode %{ 3944 int vector_len = 2; 3945 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3951 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3952 match(Set dst (ReplicateB con)); 3953 format %{ "movq $dst,[$constantaddress]\n\t" 3954 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3955 ins_encode %{ 3956 int vector_len = 0; 3957 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3958 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 3963 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3964 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3965 match(Set dst (ReplicateB con)); 3966 format %{ "movq $dst,[$constantaddress]\n\t" 3967 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3968 ins_encode %{ 3969 int vector_len = 1; 3970 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3971 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3972 %} 3973 ins_pipe( pipe_slow ); 3974 %} 3975 3976 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3977 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3978 match(Set dst (ReplicateB con)); 3979 format %{ "movq $dst,[$constantaddress]\n\t" 3980 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3981 ins_encode %{ 3982 int vector_len = 2; 3983 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3984 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3985 %} 3986 ins_pipe( pipe_slow ); 3987 %} 3988 3989 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3990 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3991 match(Set dst (ReplicateB zero)); 3992 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3993 ins_encode %{ 3994 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3995 int vector_len = 2; 3996 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3997 %} 3998 ins_pipe( fpu_reg_reg ); 3999 %} 4000 4001 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4002 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4003 match(Set dst (ReplicateS src)); 4004 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4005 ins_encode %{ 4006 int vector_len = 0; 4007 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4008 %} 4009 ins_pipe( pipe_slow ); 4010 %} 4011 4012 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4013 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4014 match(Set dst (ReplicateS (LoadS mem))); 4015 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4016 ins_encode %{ 4017 int vector_len = 0; 4018 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4019 %} 4020 ins_pipe( pipe_slow ); 4021 %} 4022 4023 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4024 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4025 match(Set dst (ReplicateS src)); 4026 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4027 ins_encode %{ 4028 int vector_len = 0; 4029 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4030 %} 4031 ins_pipe( pipe_slow ); 4032 %} 4033 4034 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4035 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4036 match(Set dst (ReplicateS (LoadS mem))); 4037 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4038 ins_encode %{ 4039 int vector_len = 0; 4040 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4041 %} 4042 ins_pipe( pipe_slow ); 4043 %} 4044 4045 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4046 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4047 match(Set dst (ReplicateS src)); 4048 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4049 ins_encode %{ 4050 int vector_len = 1; 4051 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4057 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4058 match(Set dst (ReplicateS (LoadS mem))); 4059 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4060 ins_encode %{ 4061 int vector_len = 1; 4062 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4068 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4069 match(Set dst (ReplicateS src)); 4070 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4071 ins_encode %{ 4072 int vector_len = 2; 4073 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4074 %} 4075 ins_pipe( pipe_slow ); 4076 %} 4077 4078 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4079 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4080 match(Set dst (ReplicateS (LoadS mem))); 4081 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4082 ins_encode %{ 4083 int vector_len = 2; 4084 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4085 %} 4086 ins_pipe( pipe_slow ); 4087 %} 4088 4089 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4090 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4091 match(Set dst (ReplicateS con)); 4092 format %{ "movq $dst,[$constantaddress]\n\t" 4093 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4094 ins_encode %{ 4095 int vector_len = 0; 4096 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4097 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4103 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4104 match(Set dst (ReplicateS con)); 4105 format %{ "movq $dst,[$constantaddress]\n\t" 4106 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4107 ins_encode %{ 4108 int vector_len = 1; 4109 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4110 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4116 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4117 match(Set dst (ReplicateS con)); 4118 format %{ "movq $dst,[$constantaddress]\n\t" 4119 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4120 ins_encode %{ 4121 int vector_len = 2; 4122 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4123 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 4128 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4129 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4130 match(Set dst (ReplicateS zero)); 4131 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4132 ins_encode %{ 4133 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4134 int vector_len = 2; 4135 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4136 %} 4137 ins_pipe( fpu_reg_reg ); 4138 %} 4139 4140 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4141 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4142 match(Set dst (ReplicateI src)); 4143 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4144 ins_encode %{ 4145 int vector_len = 0; 4146 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4147 %} 4148 ins_pipe( pipe_slow ); 4149 %} 4150 4151 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4152 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4153 match(Set dst (ReplicateI (LoadI mem))); 4154 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4155 ins_encode %{ 4156 int vector_len = 0; 4157 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4163 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4164 match(Set dst (ReplicateI src)); 4165 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4166 ins_encode %{ 4167 int vector_len = 1; 4168 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 4173 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4174 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4175 match(Set dst (ReplicateI (LoadI mem))); 4176 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4177 ins_encode %{ 4178 int vector_len = 1; 4179 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4180 %} 4181 ins_pipe( pipe_slow ); 4182 %} 4183 4184 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4185 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4186 match(Set dst (ReplicateI src)); 4187 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4188 ins_encode %{ 4189 int vector_len = 2; 4190 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4191 %} 4192 ins_pipe( pipe_slow ); 4193 %} 4194 4195 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4196 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4197 match(Set dst (ReplicateI (LoadI mem))); 4198 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4199 ins_encode %{ 4200 int vector_len = 2; 4201 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 4206 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4207 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4208 match(Set dst (ReplicateI con)); 4209 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4210 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4211 ins_encode %{ 4212 int vector_len = 0; 4213 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4214 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4215 %} 4216 ins_pipe( pipe_slow ); 4217 %} 4218 4219 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4220 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4221 match(Set dst (ReplicateI con)); 4222 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4223 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4224 ins_encode %{ 4225 int vector_len = 1; 4226 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4227 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4228 %} 4229 ins_pipe( pipe_slow ); 4230 %} 4231 4232 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4233 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4234 match(Set dst (ReplicateI con)); 4235 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4236 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4237 ins_encode %{ 4238 int vector_len = 2; 4239 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4240 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4241 %} 4242 ins_pipe( pipe_slow ); 4243 %} 4244 4245 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4246 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4247 match(Set dst (ReplicateI zero)); 4248 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4249 ins_encode %{ 4250 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4251 int vector_len = 2; 4252 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4253 %} 4254 ins_pipe( fpu_reg_reg ); 4255 %} 4256 4257 // Replicate long (8 byte) scalar to be vector 4258 #ifdef _LP64 4259 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4260 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4261 match(Set dst (ReplicateL src)); 4262 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4263 ins_encode %{ 4264 int vector_len = 1; 4265 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4266 %} 4267 ins_pipe( pipe_slow ); 4268 %} 4269 4270 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4271 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4272 match(Set dst (ReplicateL src)); 4273 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4274 ins_encode %{ 4275 int vector_len = 2; 4276 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4277 %} 4278 ins_pipe( pipe_slow ); 4279 %} 4280 #else // _LP64 4281 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4282 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4283 match(Set dst (ReplicateL src)); 4284 effect(TEMP dst, USE src, TEMP tmp); 4285 format %{ "movdl $dst,$src.lo\n\t" 4286 "movdl $tmp,$src.hi\n\t" 4287 "punpckldq $dst,$tmp\n\t" 4288 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4289 ins_encode %{ 4290 int vector_len = 1; 4291 __ movdl($dst$$XMMRegister, $src$$Register); 4292 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4293 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4294 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4295 %} 4296 ins_pipe( pipe_slow ); 4297 %} 4298 4299 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4300 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4301 match(Set dst (ReplicateL src)); 4302 effect(TEMP dst, USE src, TEMP tmp); 4303 format %{ "movdl $dst,$src.lo\n\t" 4304 "movdl $tmp,$src.hi\n\t" 4305 "punpckldq $dst,$tmp\n\t" 4306 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4307 ins_encode %{ 4308 int vector_len = 2; 4309 __ movdl($dst$$XMMRegister, $src$$Register); 4310 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4311 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4312 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 #endif // _LP64 4317 4318 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4319 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4320 match(Set dst (ReplicateL con)); 4321 format %{ "movq $dst,[$constantaddress]\n\t" 4322 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4323 ins_encode %{ 4324 int vector_len = 1; 4325 __ movq($dst$$XMMRegister, $constantaddress($con)); 4326 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4332 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4333 match(Set dst (ReplicateL con)); 4334 format %{ "movq $dst,[$constantaddress]\n\t" 4335 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4336 ins_encode %{ 4337 int vector_len = 2; 4338 __ movq($dst$$XMMRegister, $constantaddress($con)); 4339 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4340 %} 4341 ins_pipe( pipe_slow ); 4342 %} 4343 4344 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4345 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4346 match(Set dst (ReplicateL (LoadL mem))); 4347 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4348 ins_encode %{ 4349 int vector_len = 0; 4350 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4351 %} 4352 ins_pipe( pipe_slow ); 4353 %} 4354 4355 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4356 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4357 match(Set dst (ReplicateL (LoadL mem))); 4358 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4359 ins_encode %{ 4360 int vector_len = 1; 4361 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4362 %} 4363 ins_pipe( pipe_slow ); 4364 %} 4365 4366 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4367 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4368 match(Set dst (ReplicateL (LoadL mem))); 4369 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4370 ins_encode %{ 4371 int vector_len = 2; 4372 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4373 %} 4374 ins_pipe( pipe_slow ); 4375 %} 4376 4377 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4378 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4379 match(Set dst (ReplicateL zero)); 4380 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4381 ins_encode %{ 4382 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4383 int vector_len = 2; 4384 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4385 %} 4386 ins_pipe( fpu_reg_reg ); 4387 %} 4388 4389 instruct Repl8F_evex(vecY dst, regF src) %{ 4390 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4391 match(Set dst (ReplicateF src)); 4392 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4393 ins_encode %{ 4394 int vector_len = 1; 4395 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4396 %} 4397 ins_pipe( pipe_slow ); 4398 %} 4399 4400 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4401 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4402 match(Set dst (ReplicateF (LoadF mem))); 4403 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4404 ins_encode %{ 4405 int vector_len = 1; 4406 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4407 %} 4408 ins_pipe( pipe_slow ); 4409 %} 4410 4411 instruct Repl16F_evex(vecZ dst, regF src) %{ 4412 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4413 match(Set dst (ReplicateF src)); 4414 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4415 ins_encode %{ 4416 int vector_len = 2; 4417 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4418 %} 4419 ins_pipe( pipe_slow ); 4420 %} 4421 4422 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4423 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4424 match(Set dst (ReplicateF (LoadF mem))); 4425 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4426 ins_encode %{ 4427 int vector_len = 2; 4428 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4429 %} 4430 ins_pipe( pipe_slow ); 4431 %} 4432 4433 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4434 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4435 match(Set dst (ReplicateF zero)); 4436 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4437 ins_encode %{ 4438 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4439 int vector_len = 2; 4440 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4441 %} 4442 ins_pipe( fpu_reg_reg ); 4443 %} 4444 4445 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4446 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4447 match(Set dst (ReplicateF zero)); 4448 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4449 ins_encode %{ 4450 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4451 int vector_len = 2; 4452 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4453 %} 4454 ins_pipe( fpu_reg_reg ); 4455 %} 4456 4457 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4458 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4459 match(Set dst (ReplicateF zero)); 4460 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4461 ins_encode %{ 4462 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4463 int vector_len = 2; 4464 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4465 %} 4466 ins_pipe( fpu_reg_reg ); 4467 %} 4468 4469 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4470 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4471 match(Set dst (ReplicateF zero)); 4472 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4473 ins_encode %{ 4474 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4475 int vector_len = 2; 4476 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4477 %} 4478 ins_pipe( fpu_reg_reg ); 4479 %} 4480 4481 instruct Repl4D_evex(vecY dst, regD src) %{ 4482 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4483 match(Set dst (ReplicateD src)); 4484 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4485 ins_encode %{ 4486 int vector_len = 1; 4487 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4493 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4494 match(Set dst (ReplicateD (LoadD mem))); 4495 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4496 ins_encode %{ 4497 int vector_len = 1; 4498 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4499 %} 4500 ins_pipe( pipe_slow ); 4501 %} 4502 4503 instruct Repl8D_evex(vecZ dst, regD src) %{ 4504 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4505 match(Set dst (ReplicateD src)); 4506 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4507 ins_encode %{ 4508 int vector_len = 2; 4509 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4510 %} 4511 ins_pipe( pipe_slow ); 4512 %} 4513 4514 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4515 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4516 match(Set dst (ReplicateD (LoadD mem))); 4517 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4518 ins_encode %{ 4519 int vector_len = 2; 4520 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4521 %} 4522 ins_pipe( pipe_slow ); 4523 %} 4524 4525 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4526 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4527 match(Set dst (ReplicateD zero)); 4528 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4529 ins_encode %{ 4530 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4531 int vector_len = 2; 4532 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4533 %} 4534 ins_pipe( fpu_reg_reg ); 4535 %} 4536 4537 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4538 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4539 match(Set dst (ReplicateD zero)); 4540 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4541 ins_encode %{ 4542 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4543 int vector_len = 2; 4544 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4545 %} 4546 ins_pipe( fpu_reg_reg ); 4547 %} 4548 4549 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4550 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4551 match(Set dst (ReplicateD zero)); 4552 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4553 ins_encode %{ 4554 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4555 int vector_len = 2; 4556 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4557 %} 4558 ins_pipe( fpu_reg_reg ); 4559 %} 4560 4561 // ====================REDUCTION ARITHMETIC======================================= 4562 4563 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4564 predicate(UseSSE > 2 && UseAVX == 0); 4565 match(Set dst (AddReductionVI src1 src2)); 4566 effect(TEMP tmp2, TEMP tmp); 4567 format %{ "movdqu $tmp2,$src2\n\t" 4568 "phaddd $tmp2,$tmp2\n\t" 4569 "movd $tmp,$src1\n\t" 4570 "paddd $tmp,$tmp2\n\t" 4571 "movd $dst,$tmp\t! add reduction2I" %} 4572 ins_encode %{ 4573 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4574 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4575 __ movdl($tmp$$XMMRegister, $src1$$Register); 4576 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4577 __ movdl($dst$$Register, $tmp$$XMMRegister); 4578 %} 4579 ins_pipe( pipe_slow ); 4580 %} 4581 4582 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4583 predicate(VM_Version::supports_avxonly()); 4584 match(Set dst (AddReductionVI src1 src2)); 4585 effect(TEMP tmp, TEMP tmp2); 4586 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4587 "movd $tmp2,$src1\n\t" 4588 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4589 "movd $dst,$tmp2\t! add reduction2I" %} 4590 ins_encode %{ 4591 int vector_len = 0; 4592 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4593 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4594 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4595 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4601 predicate(UseAVX > 2); 4602 match(Set dst (AddReductionVI src1 src2)); 4603 effect(TEMP tmp, TEMP tmp2); 4604 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4605 "vpaddd $tmp,$src2,$tmp2\n\t" 4606 "movd $tmp2,$src1\n\t" 4607 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4608 "movd $dst,$tmp2\t! add reduction2I" %} 4609 ins_encode %{ 4610 int vector_len = 0; 4611 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4612 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4613 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4614 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4615 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4616 %} 4617 ins_pipe( pipe_slow ); 4618 %} 4619 4620 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4621 predicate(UseSSE > 2 && UseAVX == 0); 4622 match(Set dst (AddReductionVI src1 src2)); 4623 effect(TEMP tmp, TEMP tmp2); 4624 format %{ "movdqu $tmp,$src2\n\t" 4625 "phaddd $tmp,$tmp\n\t" 4626 "phaddd $tmp,$tmp\n\t" 4627 "movd $tmp2,$src1\n\t" 4628 "paddd $tmp2,$tmp\n\t" 4629 "movd $dst,$tmp2\t! add reduction4I" %} 4630 ins_encode %{ 4631 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4632 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4633 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4634 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4635 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4636 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4637 %} 4638 ins_pipe( pipe_slow ); 4639 %} 4640 4641 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4642 predicate(VM_Version::supports_avxonly()); 4643 match(Set dst (AddReductionVI src1 src2)); 4644 effect(TEMP tmp, TEMP tmp2); 4645 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4646 "vphaddd $tmp,$tmp,$tmp\n\t" 4647 "movd $tmp2,$src1\n\t" 4648 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4649 "movd $dst,$tmp2\t! add reduction4I" %} 4650 ins_encode %{ 4651 int vector_len = 0; 4652 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4653 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4654 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4655 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4656 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4657 %} 4658 ins_pipe( pipe_slow ); 4659 %} 4660 4661 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4662 predicate(UseAVX > 2); 4663 match(Set dst (AddReductionVI src1 src2)); 4664 effect(TEMP tmp, TEMP tmp2); 4665 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4666 "vpaddd $tmp,$src2,$tmp2\n\t" 4667 "pshufd $tmp2,$tmp,0x1\n\t" 4668 "vpaddd $tmp,$tmp,$tmp2\n\t" 4669 "movd $tmp2,$src1\n\t" 4670 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4671 "movd $dst,$tmp2\t! add reduction4I" %} 4672 ins_encode %{ 4673 int vector_len = 0; 4674 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4675 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4676 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4677 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4678 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4679 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4680 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4681 %} 4682 ins_pipe( pipe_slow ); 4683 %} 4684 4685 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4686 predicate(VM_Version::supports_avxonly()); 4687 match(Set dst (AddReductionVI src1 src2)); 4688 effect(TEMP tmp, TEMP tmp2); 4689 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4690 "vphaddd $tmp,$tmp,$tmp2\n\t" 4691 "vextracti128_high $tmp2,$tmp\n\t" 4692 "vpaddd $tmp,$tmp,$tmp2\n\t" 4693 "movd $tmp2,$src1\n\t" 4694 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4695 "movd $dst,$tmp2\t! add reduction8I" %} 4696 ins_encode %{ 4697 int vector_len = 1; 4698 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4699 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4700 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4701 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4702 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4703 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4704 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4705 %} 4706 ins_pipe( pipe_slow ); 4707 %} 4708 4709 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4710 predicate(UseAVX > 2); 4711 match(Set dst (AddReductionVI src1 src2)); 4712 effect(TEMP tmp, TEMP tmp2); 4713 format %{ "vextracti128_high $tmp,$src2\n\t" 4714 "vpaddd $tmp,$tmp,$src2\n\t" 4715 "pshufd $tmp2,$tmp,0xE\n\t" 4716 "vpaddd $tmp,$tmp,$tmp2\n\t" 4717 "pshufd $tmp2,$tmp,0x1\n\t" 4718 "vpaddd $tmp,$tmp,$tmp2\n\t" 4719 "movd $tmp2,$src1\n\t" 4720 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4721 "movd $dst,$tmp2\t! add reduction8I" %} 4722 ins_encode %{ 4723 int vector_len = 0; 4724 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4725 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4726 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4727 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4728 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4729 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4730 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4731 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4732 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4733 %} 4734 ins_pipe( pipe_slow ); 4735 %} 4736 4737 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4738 predicate(UseAVX > 2); 4739 match(Set dst (AddReductionVI src1 src2)); 4740 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4741 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4742 "vpaddd $tmp3,$tmp3,$src2\n\t" 4743 "vextracti128_high $tmp,$tmp3\n\t" 4744 "vpaddd $tmp,$tmp,$tmp3\n\t" 4745 "pshufd $tmp2,$tmp,0xE\n\t" 4746 "vpaddd $tmp,$tmp,$tmp2\n\t" 4747 "pshufd $tmp2,$tmp,0x1\n\t" 4748 "vpaddd $tmp,$tmp,$tmp2\n\t" 4749 "movd $tmp2,$src1\n\t" 4750 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4751 "movd $dst,$tmp2\t! mul reduction16I" %} 4752 ins_encode %{ 4753 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4754 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4755 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4756 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4757 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4758 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4759 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4760 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4761 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4762 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4763 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4764 %} 4765 ins_pipe( pipe_slow ); 4766 %} 4767 4768 #ifdef _LP64 4769 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4770 predicate(UseAVX > 2); 4771 match(Set dst (AddReductionVL src1 src2)); 4772 effect(TEMP tmp, TEMP tmp2); 4773 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4774 "vpaddq $tmp,$src2,$tmp2\n\t" 4775 "movdq $tmp2,$src1\n\t" 4776 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4777 "movdq $dst,$tmp2\t! add reduction2L" %} 4778 ins_encode %{ 4779 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4780 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4781 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4782 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4783 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4784 %} 4785 ins_pipe( pipe_slow ); 4786 %} 4787 4788 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4789 predicate(UseAVX > 2); 4790 match(Set dst (AddReductionVL src1 src2)); 4791 effect(TEMP tmp, TEMP tmp2); 4792 format %{ "vextracti128_high $tmp,$src2\n\t" 4793 "vpaddq $tmp2,$tmp,$src2\n\t" 4794 "pshufd $tmp,$tmp2,0xE\n\t" 4795 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4796 "movdq $tmp,$src1\n\t" 4797 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4798 "movdq $dst,$tmp2\t! add reduction4L" %} 4799 ins_encode %{ 4800 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4801 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4802 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4803 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4804 __ movdq($tmp$$XMMRegister, $src1$$Register); 4805 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4806 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4807 %} 4808 ins_pipe( pipe_slow ); 4809 %} 4810 4811 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4812 predicate(UseAVX > 2); 4813 match(Set dst (AddReductionVL src1 src2)); 4814 effect(TEMP tmp, TEMP tmp2); 4815 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4816 "vpaddq $tmp2,$tmp2,$src2\n\t" 4817 "vextracti128_high $tmp,$tmp2\n\t" 4818 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4819 "pshufd $tmp,$tmp2,0xE\n\t" 4820 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4821 "movdq $tmp,$src1\n\t" 4822 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4823 "movdq $dst,$tmp2\t! add reduction8L" %} 4824 ins_encode %{ 4825 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4826 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4827 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4828 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4829 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4830 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4831 __ movdq($tmp$$XMMRegister, $src1$$Register); 4832 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4833 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4834 %} 4835 ins_pipe( pipe_slow ); 4836 %} 4837 #endif 4838 4839 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4840 predicate(UseSSE >= 1 && UseAVX == 0); 4841 match(Set dst (AddReductionVF dst src2)); 4842 effect(TEMP dst, TEMP tmp); 4843 format %{ "addss $dst,$src2\n\t" 4844 "pshufd $tmp,$src2,0x01\n\t" 4845 "addss $dst,$tmp\t! add reduction2F" %} 4846 ins_encode %{ 4847 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4848 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4849 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4850 %} 4851 ins_pipe( pipe_slow ); 4852 %} 4853 4854 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4855 predicate(UseAVX > 0); 4856 match(Set dst (AddReductionVF dst src2)); 4857 effect(TEMP dst, TEMP tmp); 4858 format %{ "vaddss $dst,$dst,$src2\n\t" 4859 "pshufd $tmp,$src2,0x01\n\t" 4860 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4861 ins_encode %{ 4862 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4863 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4864 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4865 %} 4866 ins_pipe( pipe_slow ); 4867 %} 4868 4869 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4870 predicate(UseSSE >= 1 && UseAVX == 0); 4871 match(Set dst (AddReductionVF dst src2)); 4872 effect(TEMP dst, TEMP tmp); 4873 format %{ "addss $dst,$src2\n\t" 4874 "pshufd $tmp,$src2,0x01\n\t" 4875 "addss $dst,$tmp\n\t" 4876 "pshufd $tmp,$src2,0x02\n\t" 4877 "addss $dst,$tmp\n\t" 4878 "pshufd $tmp,$src2,0x03\n\t" 4879 "addss $dst,$tmp\t! add reduction4F" %} 4880 ins_encode %{ 4881 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4882 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4883 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4884 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4885 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4886 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4887 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4888 %} 4889 ins_pipe( pipe_slow ); 4890 %} 4891 4892 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4893 predicate(UseAVX > 0); 4894 match(Set dst (AddReductionVF dst src2)); 4895 effect(TEMP tmp, TEMP dst); 4896 format %{ "vaddss $dst,dst,$src2\n\t" 4897 "pshufd $tmp,$src2,0x01\n\t" 4898 "vaddss $dst,$dst,$tmp\n\t" 4899 "pshufd $tmp,$src2,0x02\n\t" 4900 "vaddss $dst,$dst,$tmp\n\t" 4901 "pshufd $tmp,$src2,0x03\n\t" 4902 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4903 ins_encode %{ 4904 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4905 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4906 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4907 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4908 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4909 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4910 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4911 %} 4912 ins_pipe( pipe_slow ); 4913 %} 4914 4915 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4916 predicate(UseAVX > 0); 4917 match(Set dst (AddReductionVF dst src2)); 4918 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4919 format %{ "vaddss $dst,$dst,$src2\n\t" 4920 "pshufd $tmp,$src2,0x01\n\t" 4921 "vaddss $dst,$dst,$tmp\n\t" 4922 "pshufd $tmp,$src2,0x02\n\t" 4923 "vaddss $dst,$dst,$tmp\n\t" 4924 "pshufd $tmp,$src2,0x03\n\t" 4925 "vaddss $dst,$dst,$tmp\n\t" 4926 "vextractf128_high $tmp2,$src2\n\t" 4927 "vaddss $dst,$dst,$tmp2\n\t" 4928 "pshufd $tmp,$tmp2,0x01\n\t" 4929 "vaddss $dst,$dst,$tmp\n\t" 4930 "pshufd $tmp,$tmp2,0x02\n\t" 4931 "vaddss $dst,$dst,$tmp\n\t" 4932 "pshufd $tmp,$tmp2,0x03\n\t" 4933 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4934 ins_encode %{ 4935 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4936 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4937 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4938 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4939 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4940 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4941 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4942 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4943 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4944 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4945 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4946 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4947 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4948 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4949 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4950 %} 4951 ins_pipe( pipe_slow ); 4952 %} 4953 4954 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4955 predicate(UseAVX > 2); 4956 match(Set dst (AddReductionVF dst src2)); 4957 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4958 format %{ "vaddss $dst,$dst,$src2\n\t" 4959 "pshufd $tmp,$src2,0x01\n\t" 4960 "vaddss $dst,$dst,$tmp\n\t" 4961 "pshufd $tmp,$src2,0x02\n\t" 4962 "vaddss $dst,$dst,$tmp\n\t" 4963 "pshufd $tmp,$src2,0x03\n\t" 4964 "vaddss $dst,$dst,$tmp\n\t" 4965 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4966 "vaddss $dst,$dst,$tmp2\n\t" 4967 "pshufd $tmp,$tmp2,0x01\n\t" 4968 "vaddss $dst,$dst,$tmp\n\t" 4969 "pshufd $tmp,$tmp2,0x02\n\t" 4970 "vaddss $dst,$dst,$tmp\n\t" 4971 "pshufd $tmp,$tmp2,0x03\n\t" 4972 "vaddss $dst,$dst,$tmp\n\t" 4973 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4974 "vaddss $dst,$dst,$tmp2\n\t" 4975 "pshufd $tmp,$tmp2,0x01\n\t" 4976 "vaddss $dst,$dst,$tmp\n\t" 4977 "pshufd $tmp,$tmp2,0x02\n\t" 4978 "vaddss $dst,$dst,$tmp\n\t" 4979 "pshufd $tmp,$tmp2,0x03\n\t" 4980 "vaddss $dst,$dst,$tmp\n\t" 4981 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4982 "vaddss $dst,$dst,$tmp2\n\t" 4983 "pshufd $tmp,$tmp2,0x01\n\t" 4984 "vaddss $dst,$dst,$tmp\n\t" 4985 "pshufd $tmp,$tmp2,0x02\n\t" 4986 "vaddss $dst,$dst,$tmp\n\t" 4987 "pshufd $tmp,$tmp2,0x03\n\t" 4988 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4989 ins_encode %{ 4990 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4991 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4992 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4993 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4994 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4995 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4996 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4997 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4998 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4999 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5000 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5001 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5002 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5003 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5004 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5005 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5006 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5007 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5008 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5009 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5010 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5011 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5012 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5013 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5014 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5015 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5016 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5017 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5018 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5019 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5020 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5021 %} 5022 ins_pipe( pipe_slow ); 5023 %} 5024 5025 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5026 predicate(UseSSE >= 1 && UseAVX == 0); 5027 match(Set dst (AddReductionVD dst src2)); 5028 effect(TEMP tmp, TEMP dst); 5029 format %{ "addsd $dst,$src2\n\t" 5030 "pshufd $tmp,$src2,0xE\n\t" 5031 "addsd $dst,$tmp\t! add reduction2D" %} 5032 ins_encode %{ 5033 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5034 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5035 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5036 %} 5037 ins_pipe( pipe_slow ); 5038 %} 5039 5040 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5041 predicate(UseAVX > 0); 5042 match(Set dst (AddReductionVD dst src2)); 5043 effect(TEMP tmp, TEMP dst); 5044 format %{ "vaddsd $dst,$dst,$src2\n\t" 5045 "pshufd $tmp,$src2,0xE\n\t" 5046 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5047 ins_encode %{ 5048 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5049 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5050 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 5055 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5056 predicate(UseAVX > 0); 5057 match(Set dst (AddReductionVD dst src2)); 5058 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5059 format %{ "vaddsd $dst,$dst,$src2\n\t" 5060 "pshufd $tmp,$src2,0xE\n\t" 5061 "vaddsd $dst,$dst,$tmp\n\t" 5062 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5063 "vaddsd $dst,$dst,$tmp2\n\t" 5064 "pshufd $tmp,$tmp2,0xE\n\t" 5065 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5066 ins_encode %{ 5067 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5068 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5069 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5070 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5071 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5072 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5073 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5074 %} 5075 ins_pipe( pipe_slow ); 5076 %} 5077 5078 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5079 predicate(UseAVX > 2); 5080 match(Set dst (AddReductionVD dst src2)); 5081 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5082 format %{ "vaddsd $dst,$dst,$src2\n\t" 5083 "pshufd $tmp,$src2,0xE\n\t" 5084 "vaddsd $dst,$dst,$tmp\n\t" 5085 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5086 "vaddsd $dst,$dst,$tmp2\n\t" 5087 "pshufd $tmp,$tmp2,0xE\n\t" 5088 "vaddsd $dst,$dst,$tmp\n\t" 5089 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5090 "vaddsd $dst,$dst,$tmp2\n\t" 5091 "pshufd $tmp,$tmp2,0xE\n\t" 5092 "vaddsd $dst,$dst,$tmp\n\t" 5093 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5094 "vaddsd $dst,$dst,$tmp2\n\t" 5095 "pshufd $tmp,$tmp2,0xE\n\t" 5096 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5097 ins_encode %{ 5098 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5099 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5100 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5101 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5102 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5103 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5104 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5105 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5106 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5107 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5108 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5109 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5110 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5111 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5112 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5113 %} 5114 ins_pipe( pipe_slow ); 5115 %} 5116 5117 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5118 predicate(UseSSE > 3 && UseAVX == 0); 5119 match(Set dst (MulReductionVI src1 src2)); 5120 effect(TEMP tmp, TEMP tmp2); 5121 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5122 "pmulld $tmp2,$src2\n\t" 5123 "movd $tmp,$src1\n\t" 5124 "pmulld $tmp2,$tmp\n\t" 5125 "movd $dst,$tmp2\t! mul reduction2I" %} 5126 ins_encode %{ 5127 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5128 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5129 __ movdl($tmp$$XMMRegister, $src1$$Register); 5130 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5131 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5135 5136 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5137 predicate(UseAVX > 0); 5138 match(Set dst (MulReductionVI src1 src2)); 5139 effect(TEMP tmp, TEMP tmp2); 5140 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5141 "vpmulld $tmp,$src2,$tmp2\n\t" 5142 "movd $tmp2,$src1\n\t" 5143 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5144 "movd $dst,$tmp2\t! mul reduction2I" %} 5145 ins_encode %{ 5146 int vector_len = 0; 5147 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5148 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5149 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5150 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5151 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5152 %} 5153 ins_pipe( pipe_slow ); 5154 %} 5155 5156 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5157 predicate(UseSSE > 3 && UseAVX == 0); 5158 match(Set dst (MulReductionVI src1 src2)); 5159 effect(TEMP tmp, TEMP tmp2); 5160 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5161 "pmulld $tmp2,$src2\n\t" 5162 "pshufd $tmp,$tmp2,0x1\n\t" 5163 "pmulld $tmp2,$tmp\n\t" 5164 "movd $tmp,$src1\n\t" 5165 "pmulld $tmp2,$tmp\n\t" 5166 "movd $dst,$tmp2\t! mul reduction4I" %} 5167 ins_encode %{ 5168 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5169 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5170 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5171 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5172 __ movdl($tmp$$XMMRegister, $src1$$Register); 5173 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5174 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5175 %} 5176 ins_pipe( pipe_slow ); 5177 %} 5178 5179 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5180 predicate(UseAVX > 0); 5181 match(Set dst (MulReductionVI src1 src2)); 5182 effect(TEMP tmp, TEMP tmp2); 5183 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5184 "vpmulld $tmp,$src2,$tmp2\n\t" 5185 "pshufd $tmp2,$tmp,0x1\n\t" 5186 "vpmulld $tmp,$tmp,$tmp2\n\t" 5187 "movd $tmp2,$src1\n\t" 5188 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5189 "movd $dst,$tmp2\t! mul reduction4I" %} 5190 ins_encode %{ 5191 int vector_len = 0; 5192 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5193 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5194 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5195 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5196 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5197 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5198 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5199 %} 5200 ins_pipe( pipe_slow ); 5201 %} 5202 5203 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5204 predicate(UseAVX > 0); 5205 match(Set dst (MulReductionVI src1 src2)); 5206 effect(TEMP tmp, TEMP tmp2); 5207 format %{ "vextracti128_high $tmp,$src2\n\t" 5208 "vpmulld $tmp,$tmp,$src2\n\t" 5209 "pshufd $tmp2,$tmp,0xE\n\t" 5210 "vpmulld $tmp,$tmp,$tmp2\n\t" 5211 "pshufd $tmp2,$tmp,0x1\n\t" 5212 "vpmulld $tmp,$tmp,$tmp2\n\t" 5213 "movd $tmp2,$src1\n\t" 5214 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5215 "movd $dst,$tmp2\t! mul reduction8I" %} 5216 ins_encode %{ 5217 int vector_len = 0; 5218 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5219 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5220 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5221 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5222 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5223 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5224 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5225 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5226 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5227 %} 5228 ins_pipe( pipe_slow ); 5229 %} 5230 5231 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5232 predicate(UseAVX > 2); 5233 match(Set dst (MulReductionVI src1 src2)); 5234 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5235 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5236 "vpmulld $tmp3,$tmp3,$src2\n\t" 5237 "vextracti128_high $tmp,$tmp3\n\t" 5238 "vpmulld $tmp,$tmp,$src2\n\t" 5239 "pshufd $tmp2,$tmp,0xE\n\t" 5240 "vpmulld $tmp,$tmp,$tmp2\n\t" 5241 "pshufd $tmp2,$tmp,0x1\n\t" 5242 "vpmulld $tmp,$tmp,$tmp2\n\t" 5243 "movd $tmp2,$src1\n\t" 5244 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5245 "movd $dst,$tmp2\t! mul reduction16I" %} 5246 ins_encode %{ 5247 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5248 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5249 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5250 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5251 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5252 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5253 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5254 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5255 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5256 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5257 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5258 %} 5259 ins_pipe( pipe_slow ); 5260 %} 5261 5262 #ifdef _LP64 5263 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5264 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5265 match(Set dst (MulReductionVL src1 src2)); 5266 effect(TEMP tmp, TEMP tmp2); 5267 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5268 "vpmullq $tmp,$src2,$tmp2\n\t" 5269 "movdq $tmp2,$src1\n\t" 5270 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5271 "movdq $dst,$tmp2\t! mul reduction2L" %} 5272 ins_encode %{ 5273 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5274 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5275 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5276 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5277 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5278 %} 5279 ins_pipe( pipe_slow ); 5280 %} 5281 5282 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5283 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5284 match(Set dst (MulReductionVL src1 src2)); 5285 effect(TEMP tmp, TEMP tmp2); 5286 format %{ "vextracti128_high $tmp,$src2\n\t" 5287 "vpmullq $tmp2,$tmp,$src2\n\t" 5288 "pshufd $tmp,$tmp2,0xE\n\t" 5289 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5290 "movdq $tmp,$src1\n\t" 5291 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5292 "movdq $dst,$tmp2\t! mul reduction4L" %} 5293 ins_encode %{ 5294 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5295 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5296 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5297 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5298 __ movdq($tmp$$XMMRegister, $src1$$Register); 5299 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5300 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5301 %} 5302 ins_pipe( pipe_slow ); 5303 %} 5304 5305 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5306 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5307 match(Set dst (MulReductionVL src1 src2)); 5308 effect(TEMP tmp, TEMP tmp2); 5309 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5310 "vpmullq $tmp2,$tmp2,$src2\n\t" 5311 "vextracti128_high $tmp,$tmp2\n\t" 5312 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5313 "pshufd $tmp,$tmp2,0xE\n\t" 5314 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5315 "movdq $tmp,$src1\n\t" 5316 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5317 "movdq $dst,$tmp2\t! mul reduction8L" %} 5318 ins_encode %{ 5319 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5320 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5321 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5322 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5323 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5324 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5325 __ movdq($tmp$$XMMRegister, $src1$$Register); 5326 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5327 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5328 %} 5329 ins_pipe( pipe_slow ); 5330 %} 5331 #endif 5332 5333 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5334 predicate(UseSSE >= 1 && UseAVX == 0); 5335 match(Set dst (MulReductionVF dst src2)); 5336 effect(TEMP dst, TEMP tmp); 5337 format %{ "mulss $dst,$src2\n\t" 5338 "pshufd $tmp,$src2,0x01\n\t" 5339 "mulss $dst,$tmp\t! mul reduction2F" %} 5340 ins_encode %{ 5341 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5342 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5343 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5344 %} 5345 ins_pipe( pipe_slow ); 5346 %} 5347 5348 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5349 predicate(UseAVX > 0); 5350 match(Set dst (MulReductionVF dst src2)); 5351 effect(TEMP tmp, TEMP dst); 5352 format %{ "vmulss $dst,$dst,$src2\n\t" 5353 "pshufd $tmp,$src2,0x01\n\t" 5354 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5355 ins_encode %{ 5356 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5357 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5358 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5359 %} 5360 ins_pipe( pipe_slow ); 5361 %} 5362 5363 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5364 predicate(UseSSE >= 1 && UseAVX == 0); 5365 match(Set dst (MulReductionVF dst src2)); 5366 effect(TEMP dst, TEMP tmp); 5367 format %{ "mulss $dst,$src2\n\t" 5368 "pshufd $tmp,$src2,0x01\n\t" 5369 "mulss $dst,$tmp\n\t" 5370 "pshufd $tmp,$src2,0x02\n\t" 5371 "mulss $dst,$tmp\n\t" 5372 "pshufd $tmp,$src2,0x03\n\t" 5373 "mulss $dst,$tmp\t! mul reduction4F" %} 5374 ins_encode %{ 5375 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5376 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5377 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5378 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5379 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5380 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5381 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5382 %} 5383 ins_pipe( pipe_slow ); 5384 %} 5385 5386 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5387 predicate(UseAVX > 0); 5388 match(Set dst (MulReductionVF dst src2)); 5389 effect(TEMP tmp, TEMP dst); 5390 format %{ "vmulss $dst,$dst,$src2\n\t" 5391 "pshufd $tmp,$src2,0x01\n\t" 5392 "vmulss $dst,$dst,$tmp\n\t" 5393 "pshufd $tmp,$src2,0x02\n\t" 5394 "vmulss $dst,$dst,$tmp\n\t" 5395 "pshufd $tmp,$src2,0x03\n\t" 5396 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5397 ins_encode %{ 5398 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5399 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5400 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5401 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5402 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5403 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5404 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5410 predicate(UseAVX > 0); 5411 match(Set dst (MulReductionVF dst src2)); 5412 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5413 format %{ "vmulss $dst,$dst,$src2\n\t" 5414 "pshufd $tmp,$src2,0x01\n\t" 5415 "vmulss $dst,$dst,$tmp\n\t" 5416 "pshufd $tmp,$src2,0x02\n\t" 5417 "vmulss $dst,$dst,$tmp\n\t" 5418 "pshufd $tmp,$src2,0x03\n\t" 5419 "vmulss $dst,$dst,$tmp\n\t" 5420 "vextractf128_high $tmp2,$src2\n\t" 5421 "vmulss $dst,$dst,$tmp2\n\t" 5422 "pshufd $tmp,$tmp2,0x01\n\t" 5423 "vmulss $dst,$dst,$tmp\n\t" 5424 "pshufd $tmp,$tmp2,0x02\n\t" 5425 "vmulss $dst,$dst,$tmp\n\t" 5426 "pshufd $tmp,$tmp2,0x03\n\t" 5427 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5428 ins_encode %{ 5429 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5430 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5431 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5432 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5433 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5434 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5435 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5436 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5437 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5438 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5439 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5440 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5441 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5442 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5443 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5444 %} 5445 ins_pipe( pipe_slow ); 5446 %} 5447 5448 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5449 predicate(UseAVX > 2); 5450 match(Set dst (MulReductionVF dst src2)); 5451 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5452 format %{ "vmulss $dst,$dst,$src2\n\t" 5453 "pshufd $tmp,$src2,0x01\n\t" 5454 "vmulss $dst,$dst,$tmp\n\t" 5455 "pshufd $tmp,$src2,0x02\n\t" 5456 "vmulss $dst,$dst,$tmp\n\t" 5457 "pshufd $tmp,$src2,0x03\n\t" 5458 "vmulss $dst,$dst,$tmp\n\t" 5459 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5460 "vmulss $dst,$dst,$tmp2\n\t" 5461 "pshufd $tmp,$tmp2,0x01\n\t" 5462 "vmulss $dst,$dst,$tmp\n\t" 5463 "pshufd $tmp,$tmp2,0x02\n\t" 5464 "vmulss $dst,$dst,$tmp\n\t" 5465 "pshufd $tmp,$tmp2,0x03\n\t" 5466 "vmulss $dst,$dst,$tmp\n\t" 5467 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5468 "vmulss $dst,$dst,$tmp2\n\t" 5469 "pshufd $tmp,$tmp2,0x01\n\t" 5470 "vmulss $dst,$dst,$tmp\n\t" 5471 "pshufd $tmp,$tmp2,0x02\n\t" 5472 "vmulss $dst,$dst,$tmp\n\t" 5473 "pshufd $tmp,$tmp2,0x03\n\t" 5474 "vmulss $dst,$dst,$tmp\n\t" 5475 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5476 "vmulss $dst,$dst,$tmp2\n\t" 5477 "pshufd $tmp,$tmp2,0x01\n\t" 5478 "vmulss $dst,$dst,$tmp\n\t" 5479 "pshufd $tmp,$tmp2,0x02\n\t" 5480 "vmulss $dst,$dst,$tmp\n\t" 5481 "pshufd $tmp,$tmp2,0x03\n\t" 5482 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5483 ins_encode %{ 5484 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5485 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5486 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5487 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5488 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5489 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5490 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5491 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5492 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5493 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5494 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5495 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5496 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5497 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5498 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5499 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5500 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5501 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5502 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5503 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5504 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5505 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5506 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5507 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5508 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5509 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5510 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5511 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5512 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5513 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5514 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5515 %} 5516 ins_pipe( pipe_slow ); 5517 %} 5518 5519 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5520 predicate(UseSSE >= 1 && UseAVX == 0); 5521 match(Set dst (MulReductionVD dst src2)); 5522 effect(TEMP dst, TEMP tmp); 5523 format %{ "mulsd $dst,$src2\n\t" 5524 "pshufd $tmp,$src2,0xE\n\t" 5525 "mulsd $dst,$tmp\t! mul reduction2D" %} 5526 ins_encode %{ 5527 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5528 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5529 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5530 %} 5531 ins_pipe( pipe_slow ); 5532 %} 5533 5534 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5535 predicate(UseAVX > 0); 5536 match(Set dst (MulReductionVD dst src2)); 5537 effect(TEMP tmp, TEMP dst); 5538 format %{ "vmulsd $dst,$dst,$src2\n\t" 5539 "pshufd $tmp,$src2,0xE\n\t" 5540 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5541 ins_encode %{ 5542 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5543 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5544 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5545 %} 5546 ins_pipe( pipe_slow ); 5547 %} 5548 5549 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5550 predicate(UseAVX > 0); 5551 match(Set dst (MulReductionVD dst src2)); 5552 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5553 format %{ "vmulsd $dst,$dst,$src2\n\t" 5554 "pshufd $tmp,$src2,0xE\n\t" 5555 "vmulsd $dst,$dst,$tmp\n\t" 5556 "vextractf128_high $tmp2,$src2\n\t" 5557 "vmulsd $dst,$dst,$tmp2\n\t" 5558 "pshufd $tmp,$tmp2,0xE\n\t" 5559 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5560 ins_encode %{ 5561 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5562 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5563 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5564 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5565 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5566 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5567 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5568 %} 5569 ins_pipe( pipe_slow ); 5570 %} 5571 5572 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5573 predicate(UseAVX > 2); 5574 match(Set dst (MulReductionVD dst src2)); 5575 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5576 format %{ "vmulsd $dst,$dst,$src2\n\t" 5577 "pshufd $tmp,$src2,0xE\n\t" 5578 "vmulsd $dst,$dst,$tmp\n\t" 5579 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5580 "vmulsd $dst,$dst,$tmp2\n\t" 5581 "pshufd $tmp,$src2,0xE\n\t" 5582 "vmulsd $dst,$dst,$tmp\n\t" 5583 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5584 "vmulsd $dst,$dst,$tmp2\n\t" 5585 "pshufd $tmp,$tmp2,0xE\n\t" 5586 "vmulsd $dst,$dst,$tmp\n\t" 5587 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5588 "vmulsd $dst,$dst,$tmp2\n\t" 5589 "pshufd $tmp,$tmp2,0xE\n\t" 5590 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5591 ins_encode %{ 5592 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5593 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5594 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5595 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5596 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5597 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5598 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5599 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5600 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5601 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5602 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5603 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5604 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5605 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5606 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5612 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 5613 match(Set dst (AndReductionV src1 src2)); 5614 effect(TEMP tmp, TEMP tmp2); 5615 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5616 "pand $tmp2,$src2\n\t" 5617 "movd $tmp,$src1\n\t" 5618 "pand $tmp2,$tmp\n\t" 5619 "movd $dst,$tmp2\t! and reduction2I" %} 5620 ins_encode %{ 5621 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5622 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 5623 __ movdl($tmp$$XMMRegister, $src1$$Register); 5624 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5625 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5626 %} 5627 ins_pipe( pipe_slow ); 5628 %} 5629 5630 instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5631 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 5632 match(Set dst (AndReductionV src1 src2)); 5633 effect(TEMP tmp, TEMP tmp2); 5634 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5635 "pand $tmp2,$src2\n\t" 5636 "pshufd $tmp,$tmp2,0x1\n\t" 5637 "pand $tmp2,$tmp\n\t" 5638 "movd $tmp,$src1\n\t" 5639 "pand $tmp2,$tmp\n\t" 5640 "movd $dst,$tmp2\t! and reduction4I" %} 5641 ins_encode %{ 5642 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5643 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 5644 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5645 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5646 __ movdl($tmp$$XMMRegister, $src1$$Register); 5647 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5648 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5649 %} 5650 ins_pipe( pipe_slow ); 5651 %} 5652 5653 instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5654 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_INT); 5655 match(Set dst (AndReductionV src1 src2)); 5656 effect(TEMP tmp, TEMP tmp2); 5657 format %{ "vextracti128_high $tmp,$src2\n\t" 5658 "vpand $tmp,$tmp,$src2\n\t" 5659 "vpshufd $tmp2,$tmp,0xE\n\t" 5660 "vpand $tmp,$tmp,$tmp2\n\t" 5661 "vpshufd $tmp2,$tmp,0x1\n\t" 5662 "vpand $tmp,$tmp,$tmp2\n\t" 5663 "movd $tmp2,$src1\n\t" 5664 "vpand $tmp2,$tmp,$tmp2\n\t" 5665 "movd $dst,$tmp2\t! and reduction8I" %} 5666 ins_encode %{ 5667 int vector_len = 0; 5668 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5669 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5670 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 5671 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5672 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 5673 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5674 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5675 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5676 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5677 %} 5678 ins_pipe( pipe_slow ); 5679 %} 5680 5681 instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5682 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_INT); 5683 match(Set dst (AndReductionV src1 src2)); 5684 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5685 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5686 "vpand $tmp3,$tmp3,$src2\n\t" 5687 "vextracti128_high $tmp,$tmp3\n\t" 5688 "vpand $tmp,$tmp,$src2\n\t" 5689 "vpshufd $tmp2,$tmp,0xE\n\t" 5690 "vpand $tmp,$tmp,$tmp2\n\t" 5691 "vpshufd $tmp2,$tmp,0x1\n\t" 5692 "vpand $tmp,$tmp,$tmp2\n\t" 5693 "movd $tmp2,$src1\n\t" 5694 "vpand $tmp2,$tmp,$tmp2\n\t" 5695 "movd $dst,$tmp2\t! and reduction16I" %} 5696 ins_encode %{ 5697 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5698 __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5699 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5700 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5701 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); 5702 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5703 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); 5704 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5705 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5706 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5707 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5713 predicate(UseSSE >= 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 5714 match(Set dst (AndReductionV src1 src2)); 5715 effect(TEMP tmp, TEMP tmp2); 5716 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5717 "pand $tmp2,$src2\n\t" 5718 "movdq $tmp,$src1\n\t" 5719 "pand $tmp2,$tmp\n\t" 5720 "movq $dst,$tmp2\t! and reduction2L" %} 5721 ins_encode %{ 5722 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5723 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 5724 __ movdq($tmp$$XMMRegister, $src1$$Register); 5725 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 5726 __ movq($dst$$Register, $tmp2$$XMMRegister); 5727 %} 5728 ins_pipe( pipe_slow ); 5729 %} 5730 5731 instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5732 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_LONG); 5733 match(Set dst (AndReductionV src1 src2)); 5734 effect(TEMP tmp, TEMP tmp2); 5735 format %{ "vextracti128_high $tmp,$src2\n\t" 5736 "vpand $tmp2,$tmp,$src2\n\t" 5737 "vpshufd $tmp,$tmp2,0xE\n\t" 5738 "vpand $tmp2,$tmp2,$tmp\n\t" 5739 "movq $tmp,$src1\n\t" 5740 "vpand $tmp2,$tmp2,$tmp\n\t" 5741 "movq $dst,$tmp2\t! and reduction4L" %} 5742 ins_encode %{ 5743 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5744 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5745 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 5746 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5747 __ movq($tmp$$XMMRegister, $src1$$Register); 5748 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5749 __ movq($dst$$Register, $tmp2$$XMMRegister); 5750 %} 5751 ins_pipe( pipe_slow ); 5752 %} 5753 5754 #ifdef _LP64 5755 instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5756 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 5757 match(Set dst (AndReductionV src1 src2)); 5758 effect(TEMP tmp, TEMP tmp2); 5759 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5760 "vpandq $tmp2,$tmp2,$src2\n\t" 5761 "vextracti128_high $tmp,$tmp2\n\t" 5762 "vpandq $tmp2,$tmp2,$tmp\n\t" 5763 "vpshufd $tmp,$tmp2,0xE\n\t" 5764 "vpandq $tmp2,$tmp2,$tmp\n\t" 5765 "movdq $tmp,$src1\n\t" 5766 "vpandq $tmp2,$tmp2,$tmp\n\t" 5767 "movdq $dst,$tmp2\t! and reduction8L" %} 5768 ins_encode %{ 5769 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5770 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5771 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5772 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5773 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 5774 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5775 __ movdq($tmp$$XMMRegister, $src1$$Register); 5776 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5777 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5778 %} 5779 ins_pipe( pipe_slow ); 5780 %} 5781 #endif 5782 5783 // ====================VECTOR ARITHMETIC======================================= 5784 5785 // --------------------------------- ADD -------------------------------------- 5786 5787 // Bytes vector add 5788 instruct vadd4B(vecS dst, vecS src) %{ 5789 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5790 match(Set dst (AddVB dst src)); 5791 format %{ "paddb $dst,$src\t! add packed4B" %} 5792 ins_encode %{ 5793 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5799 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5800 match(Set dst (AddVB src1 src2)); 5801 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5802 ins_encode %{ 5803 int vector_len = 0; 5804 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5805 %} 5806 ins_pipe( pipe_slow ); 5807 %} 5808 5809 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5810 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5811 match(Set dst (AddVB src1 src2)); 5812 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5813 ins_encode %{ 5814 int vector_len = 0; 5815 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5816 %} 5817 ins_pipe( pipe_slow ); 5818 %} 5819 5820 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5821 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5822 match(Set dst (AddVB dst src2)); 5823 effect(TEMP src1); 5824 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5825 ins_encode %{ 5826 int vector_len = 0; 5827 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5828 %} 5829 ins_pipe( pipe_slow ); 5830 %} 5831 5832 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5833 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5834 match(Set dst (AddVB src (LoadVector mem))); 5835 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5836 ins_encode %{ 5837 int vector_len = 0; 5838 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5839 %} 5840 ins_pipe( pipe_slow ); 5841 %} 5842 5843 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5844 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5845 match(Set dst (AddVB src (LoadVector mem))); 5846 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5847 ins_encode %{ 5848 int vector_len = 0; 5849 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5855 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5856 match(Set dst (AddVB dst (LoadVector mem))); 5857 effect(TEMP src); 5858 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5859 ins_encode %{ 5860 int vector_len = 0; 5861 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5862 %} 5863 ins_pipe( pipe_slow ); 5864 %} 5865 5866 instruct vadd8B(vecD dst, vecD src) %{ 5867 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5868 match(Set dst (AddVB dst src)); 5869 format %{ "paddb $dst,$src\t! add packed8B" %} 5870 ins_encode %{ 5871 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5877 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5878 match(Set dst (AddVB src1 src2)); 5879 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5880 ins_encode %{ 5881 int vector_len = 0; 5882 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5888 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5889 match(Set dst (AddVB src1 src2)); 5890 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5891 ins_encode %{ 5892 int vector_len = 0; 5893 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5899 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5900 match(Set dst (AddVB dst src2)); 5901 effect(TEMP src1); 5902 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5903 ins_encode %{ 5904 int vector_len = 0; 5905 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5906 %} 5907 ins_pipe( pipe_slow ); 5908 %} 5909 5910 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5911 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5912 match(Set dst (AddVB src (LoadVector mem))); 5913 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5914 ins_encode %{ 5915 int vector_len = 0; 5916 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5917 %} 5918 ins_pipe( pipe_slow ); 5919 %} 5920 5921 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5922 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5923 match(Set dst (AddVB src (LoadVector mem))); 5924 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5925 ins_encode %{ 5926 int vector_len = 0; 5927 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5933 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5934 match(Set dst (AddVB dst (LoadVector mem))); 5935 effect(TEMP src); 5936 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5937 ins_encode %{ 5938 int vector_len = 0; 5939 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5940 %} 5941 ins_pipe( pipe_slow ); 5942 %} 5943 5944 instruct vadd16B(vecX dst, vecX src) %{ 5945 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5946 match(Set dst (AddVB dst src)); 5947 format %{ "paddb $dst,$src\t! add packed16B" %} 5948 ins_encode %{ 5949 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5950 %} 5951 ins_pipe( pipe_slow ); 5952 %} 5953 5954 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5955 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5956 match(Set dst (AddVB src1 src2)); 5957 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5958 ins_encode %{ 5959 int vector_len = 0; 5960 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5961 %} 5962 ins_pipe( pipe_slow ); 5963 %} 5964 5965 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5966 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5967 match(Set dst (AddVB src1 src2)); 5968 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5969 ins_encode %{ 5970 int vector_len = 0; 5971 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5972 %} 5973 ins_pipe( pipe_slow ); 5974 %} 5975 5976 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5977 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5978 match(Set dst (AddVB dst src2)); 5979 effect(TEMP src1); 5980 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5981 ins_encode %{ 5982 int vector_len = 0; 5983 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5989 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5990 match(Set dst (AddVB src (LoadVector mem))); 5991 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5992 ins_encode %{ 5993 int vector_len = 0; 5994 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 5998 5999 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6000 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6001 match(Set dst (AddVB src (LoadVector mem))); 6002 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6003 ins_encode %{ 6004 int vector_len = 0; 6005 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6006 %} 6007 ins_pipe( pipe_slow ); 6008 %} 6009 6010 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6011 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6012 match(Set dst (AddVB dst (LoadVector mem))); 6013 effect(TEMP src); 6014 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 6015 ins_encode %{ 6016 int vector_len = 0; 6017 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6018 %} 6019 ins_pipe( pipe_slow ); 6020 %} 6021 6022 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6023 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6024 match(Set dst (AddVB src1 src2)); 6025 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6026 ins_encode %{ 6027 int vector_len = 1; 6028 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6029 %} 6030 ins_pipe( pipe_slow ); 6031 %} 6032 6033 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6034 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6035 match(Set dst (AddVB src1 src2)); 6036 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 6037 ins_encode %{ 6038 int vector_len = 1; 6039 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6040 %} 6041 ins_pipe( pipe_slow ); 6042 %} 6043 6044 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6045 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6046 match(Set dst (AddVB dst src2)); 6047 effect(TEMP src1); 6048 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 6049 ins_encode %{ 6050 int vector_len = 1; 6051 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 6056 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6057 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6058 match(Set dst (AddVB src (LoadVector mem))); 6059 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6060 ins_encode %{ 6061 int vector_len = 1; 6062 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6068 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6069 match(Set dst (AddVB src (LoadVector mem))); 6070 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6071 ins_encode %{ 6072 int vector_len = 1; 6073 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6074 %} 6075 ins_pipe( pipe_slow ); 6076 %} 6077 6078 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6079 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6080 match(Set dst (AddVB dst (LoadVector mem))); 6081 effect(TEMP src); 6082 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 6083 ins_encode %{ 6084 int vector_len = 1; 6085 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6086 %} 6087 ins_pipe( pipe_slow ); 6088 %} 6089 6090 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6091 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6092 match(Set dst (AddVB src1 src2)); 6093 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 6094 ins_encode %{ 6095 int vector_len = 2; 6096 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6097 %} 6098 ins_pipe( pipe_slow ); 6099 %} 6100 6101 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 6102 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6103 match(Set dst (AddVB src (LoadVector mem))); 6104 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 6105 ins_encode %{ 6106 int vector_len = 2; 6107 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6108 %} 6109 ins_pipe( pipe_slow ); 6110 %} 6111 6112 // Shorts/Chars vector add 6113 instruct vadd2S(vecS dst, vecS src) %{ 6114 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6115 match(Set dst (AddVS dst src)); 6116 format %{ "paddw $dst,$src\t! add packed2S" %} 6117 ins_encode %{ 6118 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6119 %} 6120 ins_pipe( pipe_slow ); 6121 %} 6122 6123 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6124 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6125 match(Set dst (AddVS src1 src2)); 6126 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6127 ins_encode %{ 6128 int vector_len = 0; 6129 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6130 %} 6131 ins_pipe( pipe_slow ); 6132 %} 6133 6134 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6135 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6136 match(Set dst (AddVS src1 src2)); 6137 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 6138 ins_encode %{ 6139 int vector_len = 0; 6140 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6141 %} 6142 ins_pipe( pipe_slow ); 6143 %} 6144 6145 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6146 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6147 match(Set dst (AddVS dst src2)); 6148 effect(TEMP src1); 6149 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 6150 ins_encode %{ 6151 int vector_len = 0; 6152 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6153 %} 6154 ins_pipe( pipe_slow ); 6155 %} 6156 6157 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6158 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6159 match(Set dst (AddVS src (LoadVector mem))); 6160 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6161 ins_encode %{ 6162 int vector_len = 0; 6163 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6164 %} 6165 ins_pipe( pipe_slow ); 6166 %} 6167 6168 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6169 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6170 match(Set dst (AddVS src (LoadVector mem))); 6171 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6172 ins_encode %{ 6173 int vector_len = 0; 6174 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6180 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6181 match(Set dst (AddVS dst (LoadVector mem))); 6182 effect(TEMP src); 6183 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 6184 ins_encode %{ 6185 int vector_len = 0; 6186 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6187 %} 6188 ins_pipe( pipe_slow ); 6189 %} 6190 6191 instruct vadd4S(vecD dst, vecD src) %{ 6192 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6193 match(Set dst (AddVS dst src)); 6194 format %{ "paddw $dst,$src\t! add packed4S" %} 6195 ins_encode %{ 6196 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6202 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6203 match(Set dst (AddVS src1 src2)); 6204 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6205 ins_encode %{ 6206 int vector_len = 0; 6207 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6213 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6214 match(Set dst (AddVS src1 src2)); 6215 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6216 ins_encode %{ 6217 int vector_len = 0; 6218 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6219 %} 6220 ins_pipe( pipe_slow ); 6221 %} 6222 6223 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6224 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6225 match(Set dst (AddVS dst src2)); 6226 effect(TEMP src1); 6227 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6228 ins_encode %{ 6229 int vector_len = 0; 6230 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6231 %} 6232 ins_pipe( pipe_slow ); 6233 %} 6234 6235 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6236 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6237 match(Set dst (AddVS src (LoadVector mem))); 6238 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6239 ins_encode %{ 6240 int vector_len = 0; 6241 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6247 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6248 match(Set dst (AddVS src (LoadVector mem))); 6249 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6250 ins_encode %{ 6251 int vector_len = 0; 6252 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6253 %} 6254 ins_pipe( pipe_slow ); 6255 %} 6256 6257 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6258 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6259 match(Set dst (AddVS dst (LoadVector mem))); 6260 effect(TEMP src); 6261 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6262 ins_encode %{ 6263 int vector_len = 0; 6264 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6265 %} 6266 ins_pipe( pipe_slow ); 6267 %} 6268 6269 instruct vadd8S(vecX dst, vecX src) %{ 6270 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6271 match(Set dst (AddVS dst src)); 6272 format %{ "paddw $dst,$src\t! add packed8S" %} 6273 ins_encode %{ 6274 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6280 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6281 match(Set dst (AddVS src1 src2)); 6282 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6283 ins_encode %{ 6284 int vector_len = 0; 6285 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6286 %} 6287 ins_pipe( pipe_slow ); 6288 %} 6289 6290 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6291 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6292 match(Set dst (AddVS src1 src2)); 6293 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6294 ins_encode %{ 6295 int vector_len = 0; 6296 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6302 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6303 match(Set dst (AddVS dst src2)); 6304 effect(TEMP src1); 6305 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6306 ins_encode %{ 6307 int vector_len = 0; 6308 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6309 %} 6310 ins_pipe( pipe_slow ); 6311 %} 6312 6313 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6314 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6315 match(Set dst (AddVS src (LoadVector mem))); 6316 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6317 ins_encode %{ 6318 int vector_len = 0; 6319 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6320 %} 6321 ins_pipe( pipe_slow ); 6322 %} 6323 6324 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6325 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6326 match(Set dst (AddVS src (LoadVector mem))); 6327 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6328 ins_encode %{ 6329 int vector_len = 0; 6330 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6331 %} 6332 ins_pipe( pipe_slow ); 6333 %} 6334 6335 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6336 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6337 match(Set dst (AddVS dst (LoadVector mem))); 6338 effect(TEMP src); 6339 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6340 ins_encode %{ 6341 int vector_len = 0; 6342 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6343 %} 6344 ins_pipe( pipe_slow ); 6345 %} 6346 6347 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6348 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6349 match(Set dst (AddVS src1 src2)); 6350 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6351 ins_encode %{ 6352 int vector_len = 1; 6353 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6354 %} 6355 ins_pipe( pipe_slow ); 6356 %} 6357 6358 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6359 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6360 match(Set dst (AddVS src1 src2)); 6361 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6362 ins_encode %{ 6363 int vector_len = 1; 6364 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6365 %} 6366 ins_pipe( pipe_slow ); 6367 %} 6368 6369 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6370 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6371 match(Set dst (AddVS dst src2)); 6372 effect(TEMP src1); 6373 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6374 ins_encode %{ 6375 int vector_len = 1; 6376 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6377 %} 6378 ins_pipe( pipe_slow ); 6379 %} 6380 6381 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6382 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6383 match(Set dst (AddVS src (LoadVector mem))); 6384 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6385 ins_encode %{ 6386 int vector_len = 1; 6387 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6388 %} 6389 ins_pipe( pipe_slow ); 6390 %} 6391 6392 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6393 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6394 match(Set dst (AddVS src (LoadVector mem))); 6395 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6396 ins_encode %{ 6397 int vector_len = 1; 6398 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6399 %} 6400 ins_pipe( pipe_slow ); 6401 %} 6402 6403 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6404 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6405 match(Set dst (AddVS dst (LoadVector mem))); 6406 effect(TEMP src); 6407 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6408 ins_encode %{ 6409 int vector_len = 1; 6410 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6416 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6417 match(Set dst (AddVS src1 src2)); 6418 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6419 ins_encode %{ 6420 int vector_len = 2; 6421 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6427 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6428 match(Set dst (AddVS src (LoadVector mem))); 6429 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6430 ins_encode %{ 6431 int vector_len = 2; 6432 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 // Integers vector add 6438 instruct vadd2I(vecD dst, vecD src) %{ 6439 predicate(n->as_Vector()->length() == 2); 6440 match(Set dst (AddVI dst src)); 6441 format %{ "paddd $dst,$src\t! add packed2I" %} 6442 ins_encode %{ 6443 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6449 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6450 match(Set dst (AddVI src1 src2)); 6451 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6452 ins_encode %{ 6453 int vector_len = 0; 6454 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6460 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6461 match(Set dst (AddVI src (LoadVector mem))); 6462 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6463 ins_encode %{ 6464 int vector_len = 0; 6465 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vadd4I(vecX dst, vecX src) %{ 6471 predicate(n->as_Vector()->length() == 4); 6472 match(Set dst (AddVI dst src)); 6473 format %{ "paddd $dst,$src\t! add packed4I" %} 6474 ins_encode %{ 6475 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6481 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6482 match(Set dst (AddVI src1 src2)); 6483 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6484 ins_encode %{ 6485 int vector_len = 0; 6486 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6492 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6493 match(Set dst (AddVI src (LoadVector mem))); 6494 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6495 ins_encode %{ 6496 int vector_len = 0; 6497 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6503 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6504 match(Set dst (AddVI src1 src2)); 6505 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6506 ins_encode %{ 6507 int vector_len = 1; 6508 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6514 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6515 match(Set dst (AddVI src (LoadVector mem))); 6516 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6517 ins_encode %{ 6518 int vector_len = 1; 6519 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6525 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6526 match(Set dst (AddVI src1 src2)); 6527 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6528 ins_encode %{ 6529 int vector_len = 2; 6530 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6536 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6537 match(Set dst (AddVI src (LoadVector mem))); 6538 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6539 ins_encode %{ 6540 int vector_len = 2; 6541 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 // Longs vector add 6547 instruct vadd2L(vecX dst, vecX src) %{ 6548 predicate(n->as_Vector()->length() == 2); 6549 match(Set dst (AddVL dst src)); 6550 format %{ "paddq $dst,$src\t! add packed2L" %} 6551 ins_encode %{ 6552 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6558 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6559 match(Set dst (AddVL src1 src2)); 6560 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6561 ins_encode %{ 6562 int vector_len = 0; 6563 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6570 match(Set dst (AddVL src (LoadVector mem))); 6571 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6572 ins_encode %{ 6573 int vector_len = 0; 6574 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6580 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6581 match(Set dst (AddVL src1 src2)); 6582 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6583 ins_encode %{ 6584 int vector_len = 1; 6585 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6591 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6592 match(Set dst (AddVL src (LoadVector mem))); 6593 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6594 ins_encode %{ 6595 int vector_len = 1; 6596 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6602 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6603 match(Set dst (AddVL src1 src2)); 6604 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6605 ins_encode %{ 6606 int vector_len = 2; 6607 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6608 %} 6609 ins_pipe( pipe_slow ); 6610 %} 6611 6612 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6613 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6614 match(Set dst (AddVL src (LoadVector mem))); 6615 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6616 ins_encode %{ 6617 int vector_len = 2; 6618 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6619 %} 6620 ins_pipe( pipe_slow ); 6621 %} 6622 6623 // Floats vector add 6624 instruct vadd2F(vecD dst, vecD src) %{ 6625 predicate(n->as_Vector()->length() == 2); 6626 match(Set dst (AddVF dst src)); 6627 format %{ "addps $dst,$src\t! add packed2F" %} 6628 ins_encode %{ 6629 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6635 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6636 match(Set dst (AddVF src1 src2)); 6637 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6638 ins_encode %{ 6639 int vector_len = 0; 6640 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6646 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6647 match(Set dst (AddVF src (LoadVector mem))); 6648 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6649 ins_encode %{ 6650 int vector_len = 0; 6651 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct vadd4F(vecX dst, vecX src) %{ 6657 predicate(n->as_Vector()->length() == 4); 6658 match(Set dst (AddVF dst src)); 6659 format %{ "addps $dst,$src\t! add packed4F" %} 6660 ins_encode %{ 6661 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6662 %} 6663 ins_pipe( pipe_slow ); 6664 %} 6665 6666 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6667 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6668 match(Set dst (AddVF src1 src2)); 6669 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6670 ins_encode %{ 6671 int vector_len = 0; 6672 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6673 %} 6674 ins_pipe( pipe_slow ); 6675 %} 6676 6677 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6678 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6679 match(Set dst (AddVF src (LoadVector mem))); 6680 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6681 ins_encode %{ 6682 int vector_len = 0; 6683 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6684 %} 6685 ins_pipe( pipe_slow ); 6686 %} 6687 6688 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6689 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6690 match(Set dst (AddVF src1 src2)); 6691 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6692 ins_encode %{ 6693 int vector_len = 1; 6694 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6695 %} 6696 ins_pipe( pipe_slow ); 6697 %} 6698 6699 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6700 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6701 match(Set dst (AddVF src (LoadVector mem))); 6702 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6703 ins_encode %{ 6704 int vector_len = 1; 6705 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6706 %} 6707 ins_pipe( pipe_slow ); 6708 %} 6709 6710 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6711 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6712 match(Set dst (AddVF src1 src2)); 6713 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6714 ins_encode %{ 6715 int vector_len = 2; 6716 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6717 %} 6718 ins_pipe( pipe_slow ); 6719 %} 6720 6721 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6722 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6723 match(Set dst (AddVF src (LoadVector mem))); 6724 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6725 ins_encode %{ 6726 int vector_len = 2; 6727 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6728 %} 6729 ins_pipe( pipe_slow ); 6730 %} 6731 6732 // Doubles vector add 6733 instruct vadd2D(vecX dst, vecX src) %{ 6734 predicate(n->as_Vector()->length() == 2); 6735 match(Set dst (AddVD dst src)); 6736 format %{ "addpd $dst,$src\t! add packed2D" %} 6737 ins_encode %{ 6738 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6739 %} 6740 ins_pipe( pipe_slow ); 6741 %} 6742 6743 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6744 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6745 match(Set dst (AddVD src1 src2)); 6746 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6747 ins_encode %{ 6748 int vector_len = 0; 6749 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6750 %} 6751 ins_pipe( pipe_slow ); 6752 %} 6753 6754 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6755 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6756 match(Set dst (AddVD src (LoadVector mem))); 6757 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6758 ins_encode %{ 6759 int vector_len = 0; 6760 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6761 %} 6762 ins_pipe( pipe_slow ); 6763 %} 6764 6765 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6766 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6767 match(Set dst (AddVD src1 src2)); 6768 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6769 ins_encode %{ 6770 int vector_len = 1; 6771 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6772 %} 6773 ins_pipe( pipe_slow ); 6774 %} 6775 6776 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6778 match(Set dst (AddVD src (LoadVector mem))); 6779 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6780 ins_encode %{ 6781 int vector_len = 1; 6782 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6783 %} 6784 ins_pipe( pipe_slow ); 6785 %} 6786 6787 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6788 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6789 match(Set dst (AddVD src1 src2)); 6790 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6791 ins_encode %{ 6792 int vector_len = 2; 6793 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6794 %} 6795 ins_pipe( pipe_slow ); 6796 %} 6797 6798 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6799 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6800 match(Set dst (AddVD src (LoadVector mem))); 6801 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6802 ins_encode %{ 6803 int vector_len = 2; 6804 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6805 %} 6806 ins_pipe( pipe_slow ); 6807 %} 6808 6809 // --------------------------------- SUB -------------------------------------- 6810 6811 // Bytes vector sub 6812 instruct vsub4B(vecS dst, vecS src) %{ 6813 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6814 match(Set dst (SubVB dst src)); 6815 format %{ "psubb $dst,$src\t! sub packed4B" %} 6816 ins_encode %{ 6817 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6818 %} 6819 ins_pipe( pipe_slow ); 6820 %} 6821 6822 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6823 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6824 match(Set dst (SubVB src1 src2)); 6825 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6826 ins_encode %{ 6827 int vector_len = 0; 6828 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6834 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6835 match(Set dst (SubVB src1 src2)); 6836 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6837 ins_encode %{ 6838 int vector_len = 0; 6839 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6840 %} 6841 ins_pipe( pipe_slow ); 6842 %} 6843 6844 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6845 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6846 match(Set dst (SubVB dst src2)); 6847 effect(TEMP src1); 6848 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6849 ins_encode %{ 6850 int vector_len = 0; 6851 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6852 %} 6853 ins_pipe( pipe_slow ); 6854 %} 6855 6856 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6857 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6858 match(Set dst (SubVB src (LoadVector mem))); 6859 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6860 ins_encode %{ 6861 int vector_len = 0; 6862 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6868 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6869 match(Set dst (SubVB src (LoadVector mem))); 6870 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6871 ins_encode %{ 6872 int vector_len = 0; 6873 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6874 %} 6875 ins_pipe( pipe_slow ); 6876 %} 6877 6878 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6879 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6880 match(Set dst (SubVB dst (LoadVector mem))); 6881 effect(TEMP src); 6882 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6883 ins_encode %{ 6884 int vector_len = 0; 6885 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6886 %} 6887 ins_pipe( pipe_slow ); 6888 %} 6889 6890 instruct vsub8B(vecD dst, vecD src) %{ 6891 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6892 match(Set dst (SubVB dst src)); 6893 format %{ "psubb $dst,$src\t! sub packed8B" %} 6894 ins_encode %{ 6895 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6901 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6902 match(Set dst (SubVB src1 src2)); 6903 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6904 ins_encode %{ 6905 int vector_len = 0; 6906 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6912 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6913 match(Set dst (SubVB src1 src2)); 6914 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6915 ins_encode %{ 6916 int vector_len = 0; 6917 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6918 %} 6919 ins_pipe( pipe_slow ); 6920 %} 6921 6922 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6923 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6924 match(Set dst (SubVB dst src2)); 6925 effect(TEMP src1); 6926 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6927 ins_encode %{ 6928 int vector_len = 0; 6929 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6930 %} 6931 ins_pipe( pipe_slow ); 6932 %} 6933 6934 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6935 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6936 match(Set dst (SubVB src (LoadVector mem))); 6937 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6938 ins_encode %{ 6939 int vector_len = 0; 6940 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6946 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6947 match(Set dst (SubVB src (LoadVector mem))); 6948 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6949 ins_encode %{ 6950 int vector_len = 0; 6951 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6952 %} 6953 ins_pipe( pipe_slow ); 6954 %} 6955 6956 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6957 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6958 match(Set dst (SubVB dst (LoadVector mem))); 6959 effect(TEMP src); 6960 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6961 ins_encode %{ 6962 int vector_len = 0; 6963 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6964 %} 6965 ins_pipe( pipe_slow ); 6966 %} 6967 6968 instruct vsub16B(vecX dst, vecX src) %{ 6969 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6970 match(Set dst (SubVB dst src)); 6971 format %{ "psubb $dst,$src\t! sub packed16B" %} 6972 ins_encode %{ 6973 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6974 %} 6975 ins_pipe( pipe_slow ); 6976 %} 6977 6978 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6979 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6980 match(Set dst (SubVB src1 src2)); 6981 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6982 ins_encode %{ 6983 int vector_len = 0; 6984 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6990 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6991 match(Set dst (SubVB src1 src2)); 6992 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6993 ins_encode %{ 6994 int vector_len = 0; 6995 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6996 %} 6997 ins_pipe( pipe_slow ); 6998 %} 6999 7000 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7001 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7002 match(Set dst (SubVB dst src2)); 7003 effect(TEMP src1); 7004 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 7005 ins_encode %{ 7006 int vector_len = 0; 7007 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7008 %} 7009 ins_pipe( pipe_slow ); 7010 %} 7011 7012 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7013 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7014 match(Set dst (SubVB src (LoadVector mem))); 7015 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7016 ins_encode %{ 7017 int vector_len = 0; 7018 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7019 %} 7020 ins_pipe( pipe_slow ); 7021 %} 7022 7023 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7024 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7025 match(Set dst (SubVB src (LoadVector mem))); 7026 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7027 ins_encode %{ 7028 int vector_len = 0; 7029 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7030 %} 7031 ins_pipe( pipe_slow ); 7032 %} 7033 7034 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7035 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7036 match(Set dst (SubVB dst (LoadVector mem))); 7037 effect(TEMP src); 7038 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 7039 ins_encode %{ 7040 int vector_len = 0; 7041 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7042 %} 7043 ins_pipe( pipe_slow ); 7044 %} 7045 7046 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7047 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7048 match(Set dst (SubVB src1 src2)); 7049 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7050 ins_encode %{ 7051 int vector_len = 1; 7052 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7053 %} 7054 ins_pipe( pipe_slow ); 7055 %} 7056 7057 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7058 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7059 match(Set dst (SubVB src1 src2)); 7060 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7061 ins_encode %{ 7062 int vector_len = 1; 7063 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7064 %} 7065 ins_pipe( pipe_slow ); 7066 %} 7067 7068 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7069 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7070 match(Set dst (SubVB dst src2)); 7071 effect(TEMP src1); 7072 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 7073 ins_encode %{ 7074 int vector_len = 1; 7075 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7076 %} 7077 ins_pipe( pipe_slow ); 7078 %} 7079 7080 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7081 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7082 match(Set dst (SubVB src (LoadVector mem))); 7083 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7084 ins_encode %{ 7085 int vector_len = 1; 7086 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7087 %} 7088 ins_pipe( pipe_slow ); 7089 %} 7090 7091 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7092 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7093 match(Set dst (SubVB src (LoadVector mem))); 7094 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7095 ins_encode %{ 7096 int vector_len = 1; 7097 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7098 %} 7099 ins_pipe( pipe_slow ); 7100 %} 7101 7102 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7103 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7104 match(Set dst (SubVB dst (LoadVector mem))); 7105 effect(TEMP src); 7106 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 7107 ins_encode %{ 7108 int vector_len = 1; 7109 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7110 %} 7111 ins_pipe( pipe_slow ); 7112 %} 7113 7114 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7115 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7116 match(Set dst (SubVB src1 src2)); 7117 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 7118 ins_encode %{ 7119 int vector_len = 2; 7120 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7121 %} 7122 ins_pipe( pipe_slow ); 7123 %} 7124 7125 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 7126 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7127 match(Set dst (SubVB src (LoadVector mem))); 7128 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 7129 ins_encode %{ 7130 int vector_len = 2; 7131 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7132 %} 7133 ins_pipe( pipe_slow ); 7134 %} 7135 7136 // Shorts/Chars vector sub 7137 instruct vsub2S(vecS dst, vecS src) %{ 7138 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7139 match(Set dst (SubVS dst src)); 7140 format %{ "psubw $dst,$src\t! sub packed2S" %} 7141 ins_encode %{ 7142 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7148 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7149 match(Set dst (SubVS src1 src2)); 7150 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7151 ins_encode %{ 7152 int vector_len = 0; 7153 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7154 %} 7155 ins_pipe( pipe_slow ); 7156 %} 7157 7158 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7159 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7160 match(Set dst (SubVS src1 src2)); 7161 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7162 ins_encode %{ 7163 int vector_len = 0; 7164 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7165 %} 7166 ins_pipe( pipe_slow ); 7167 %} 7168 7169 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7170 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7171 match(Set dst (SubVS dst src2)); 7172 effect(TEMP src1); 7173 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 7174 ins_encode %{ 7175 int vector_len = 0; 7176 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7177 %} 7178 ins_pipe( pipe_slow ); 7179 %} 7180 7181 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7182 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7183 match(Set dst (SubVS src (LoadVector mem))); 7184 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7185 ins_encode %{ 7186 int vector_len = 0; 7187 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7193 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7194 match(Set dst (SubVS src (LoadVector mem))); 7195 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7196 ins_encode %{ 7197 int vector_len = 0; 7198 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7199 %} 7200 ins_pipe( pipe_slow ); 7201 %} 7202 7203 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7204 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7205 match(Set dst (SubVS dst (LoadVector mem))); 7206 effect(TEMP src); 7207 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7208 ins_encode %{ 7209 int vector_len = 0; 7210 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vsub4S(vecD dst, vecD src) %{ 7216 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7217 match(Set dst (SubVS dst src)); 7218 format %{ "psubw $dst,$src\t! sub packed4S" %} 7219 ins_encode %{ 7220 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7221 %} 7222 ins_pipe( pipe_slow ); 7223 %} 7224 7225 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7226 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7227 match(Set dst (SubVS src1 src2)); 7228 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7229 ins_encode %{ 7230 int vector_len = 0; 7231 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7232 %} 7233 ins_pipe( pipe_slow ); 7234 %} 7235 7236 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7237 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7238 match(Set dst (SubVS src1 src2)); 7239 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7240 ins_encode %{ 7241 int vector_len = 0; 7242 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7243 %} 7244 ins_pipe( pipe_slow ); 7245 %} 7246 7247 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7248 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7249 match(Set dst (SubVS dst src2)); 7250 effect(TEMP src1); 7251 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7252 ins_encode %{ 7253 int vector_len = 0; 7254 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7260 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7261 match(Set dst (SubVS src (LoadVector mem))); 7262 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7263 ins_encode %{ 7264 int vector_len = 0; 7265 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7271 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7272 match(Set dst (SubVS src (LoadVector mem))); 7273 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7274 ins_encode %{ 7275 int vector_len = 0; 7276 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7277 %} 7278 ins_pipe( pipe_slow ); 7279 %} 7280 7281 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7282 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7283 match(Set dst (SubVS dst (LoadVector mem))); 7284 effect(TEMP src); 7285 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7286 ins_encode %{ 7287 int vector_len = 0; 7288 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7289 %} 7290 ins_pipe( pipe_slow ); 7291 %} 7292 7293 instruct vsub8S(vecX dst, vecX src) %{ 7294 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7295 match(Set dst (SubVS dst src)); 7296 format %{ "psubw $dst,$src\t! sub packed8S" %} 7297 ins_encode %{ 7298 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7299 %} 7300 ins_pipe( pipe_slow ); 7301 %} 7302 7303 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7304 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7305 match(Set dst (SubVS src1 src2)); 7306 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7307 ins_encode %{ 7308 int vector_len = 0; 7309 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7315 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7316 match(Set dst (SubVS src1 src2)); 7317 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7318 ins_encode %{ 7319 int vector_len = 0; 7320 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7326 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7327 match(Set dst (SubVS dst src2)); 7328 effect(TEMP src1); 7329 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7330 ins_encode %{ 7331 int vector_len = 0; 7332 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7338 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7339 match(Set dst (SubVS src (LoadVector mem))); 7340 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7341 ins_encode %{ 7342 int vector_len = 0; 7343 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7349 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7350 match(Set dst (SubVS src (LoadVector mem))); 7351 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7352 ins_encode %{ 7353 int vector_len = 0; 7354 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7355 %} 7356 ins_pipe( pipe_slow ); 7357 %} 7358 7359 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7360 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7361 match(Set dst (SubVS dst (LoadVector mem))); 7362 effect(TEMP src); 7363 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7364 ins_encode %{ 7365 int vector_len = 0; 7366 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7367 %} 7368 ins_pipe( pipe_slow ); 7369 %} 7370 7371 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7372 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7373 match(Set dst (SubVS src1 src2)); 7374 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7375 ins_encode %{ 7376 int vector_len = 1; 7377 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7378 %} 7379 ins_pipe( pipe_slow ); 7380 %} 7381 7382 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7383 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7384 match(Set dst (SubVS src1 src2)); 7385 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7386 ins_encode %{ 7387 int vector_len = 1; 7388 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7389 %} 7390 ins_pipe( pipe_slow ); 7391 %} 7392 7393 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7394 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7395 match(Set dst (SubVS dst src2)); 7396 effect(TEMP src1); 7397 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7398 ins_encode %{ 7399 int vector_len = 1; 7400 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7401 %} 7402 ins_pipe( pipe_slow ); 7403 %} 7404 7405 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7406 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7407 match(Set dst (SubVS src (LoadVector mem))); 7408 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7409 ins_encode %{ 7410 int vector_len = 1; 7411 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7417 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7418 match(Set dst (SubVS src (LoadVector mem))); 7419 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7420 ins_encode %{ 7421 int vector_len = 1; 7422 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7428 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7429 match(Set dst (SubVS dst (LoadVector mem))); 7430 effect(TEMP src); 7431 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7432 ins_encode %{ 7433 int vector_len = 1; 7434 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7440 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7441 match(Set dst (SubVS src1 src2)); 7442 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7443 ins_encode %{ 7444 int vector_len = 2; 7445 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7451 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7452 match(Set dst (SubVS src (LoadVector mem))); 7453 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7454 ins_encode %{ 7455 int vector_len = 2; 7456 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 // Integers vector sub 7462 instruct vsub2I(vecD dst, vecD src) %{ 7463 predicate(n->as_Vector()->length() == 2); 7464 match(Set dst (SubVI dst src)); 7465 format %{ "psubd $dst,$src\t! sub packed2I" %} 7466 ins_encode %{ 7467 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7473 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7474 match(Set dst (SubVI src1 src2)); 7475 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7476 ins_encode %{ 7477 int vector_len = 0; 7478 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7484 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7485 match(Set dst (SubVI src (LoadVector mem))); 7486 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7487 ins_encode %{ 7488 int vector_len = 0; 7489 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vsub4I(vecX dst, vecX src) %{ 7495 predicate(n->as_Vector()->length() == 4); 7496 match(Set dst (SubVI dst src)); 7497 format %{ "psubd $dst,$src\t! sub packed4I" %} 7498 ins_encode %{ 7499 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7505 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7506 match(Set dst (SubVI src1 src2)); 7507 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7508 ins_encode %{ 7509 int vector_len = 0; 7510 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7516 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7517 match(Set dst (SubVI src (LoadVector mem))); 7518 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7519 ins_encode %{ 7520 int vector_len = 0; 7521 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7522 %} 7523 ins_pipe( pipe_slow ); 7524 %} 7525 7526 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7527 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7528 match(Set dst (SubVI src1 src2)); 7529 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7530 ins_encode %{ 7531 int vector_len = 1; 7532 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7538 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7539 match(Set dst (SubVI src (LoadVector mem))); 7540 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7541 ins_encode %{ 7542 int vector_len = 1; 7543 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7549 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7550 match(Set dst (SubVI src1 src2)); 7551 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7552 ins_encode %{ 7553 int vector_len = 2; 7554 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7560 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7561 match(Set dst (SubVI src (LoadVector mem))); 7562 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7563 ins_encode %{ 7564 int vector_len = 2; 7565 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 // Longs vector sub 7571 instruct vsub2L(vecX dst, vecX src) %{ 7572 predicate(n->as_Vector()->length() == 2); 7573 match(Set dst (SubVL dst src)); 7574 format %{ "psubq $dst,$src\t! sub packed2L" %} 7575 ins_encode %{ 7576 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7582 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7583 match(Set dst (SubVL src1 src2)); 7584 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7585 ins_encode %{ 7586 int vector_len = 0; 7587 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7588 %} 7589 ins_pipe( pipe_slow ); 7590 %} 7591 7592 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7593 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7594 match(Set dst (SubVL src (LoadVector mem))); 7595 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7596 ins_encode %{ 7597 int vector_len = 0; 7598 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7604 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7605 match(Set dst (SubVL src1 src2)); 7606 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7607 ins_encode %{ 7608 int vector_len = 1; 7609 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7615 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7616 match(Set dst (SubVL src (LoadVector mem))); 7617 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7618 ins_encode %{ 7619 int vector_len = 1; 7620 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7626 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7627 match(Set dst (SubVL src1 src2)); 7628 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7629 ins_encode %{ 7630 int vector_len = 2; 7631 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7632 %} 7633 ins_pipe( pipe_slow ); 7634 %} 7635 7636 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7637 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7638 match(Set dst (SubVL src (LoadVector mem))); 7639 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7640 ins_encode %{ 7641 int vector_len = 2; 7642 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7643 %} 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 // Floats vector sub 7648 instruct vsub2F(vecD dst, vecD src) %{ 7649 predicate(n->as_Vector()->length() == 2); 7650 match(Set dst (SubVF dst src)); 7651 format %{ "subps $dst,$src\t! sub packed2F" %} 7652 ins_encode %{ 7653 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7654 %} 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7659 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7660 match(Set dst (SubVF src1 src2)); 7661 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7662 ins_encode %{ 7663 int vector_len = 0; 7664 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7665 %} 7666 ins_pipe( pipe_slow ); 7667 %} 7668 7669 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7670 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7671 match(Set dst (SubVF src (LoadVector mem))); 7672 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7673 ins_encode %{ 7674 int vector_len = 0; 7675 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 instruct vsub4F(vecX dst, vecX src) %{ 7681 predicate(n->as_Vector()->length() == 4); 7682 match(Set dst (SubVF dst src)); 7683 format %{ "subps $dst,$src\t! sub packed4F" %} 7684 ins_encode %{ 7685 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7686 %} 7687 ins_pipe( pipe_slow ); 7688 %} 7689 7690 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7691 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7692 match(Set dst (SubVF src1 src2)); 7693 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7694 ins_encode %{ 7695 int vector_len = 0; 7696 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7702 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7703 match(Set dst (SubVF src (LoadVector mem))); 7704 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7705 ins_encode %{ 7706 int vector_len = 0; 7707 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7713 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7714 match(Set dst (SubVF src1 src2)); 7715 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7716 ins_encode %{ 7717 int vector_len = 1; 7718 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7724 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7725 match(Set dst (SubVF src (LoadVector mem))); 7726 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7727 ins_encode %{ 7728 int vector_len = 1; 7729 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7735 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7736 match(Set dst (SubVF src1 src2)); 7737 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7738 ins_encode %{ 7739 int vector_len = 2; 7740 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7746 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7747 match(Set dst (SubVF src (LoadVector mem))); 7748 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7749 ins_encode %{ 7750 int vector_len = 2; 7751 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 // Doubles vector sub 7757 instruct vsub2D(vecX dst, vecX src) %{ 7758 predicate(n->as_Vector()->length() == 2); 7759 match(Set dst (SubVD dst src)); 7760 format %{ "subpd $dst,$src\t! sub packed2D" %} 7761 ins_encode %{ 7762 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7763 %} 7764 ins_pipe( pipe_slow ); 7765 %} 7766 7767 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7768 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7769 match(Set dst (SubVD src1 src2)); 7770 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7771 ins_encode %{ 7772 int vector_len = 0; 7773 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7779 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7780 match(Set dst (SubVD src (LoadVector mem))); 7781 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7782 ins_encode %{ 7783 int vector_len = 0; 7784 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7790 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7791 match(Set dst (SubVD src1 src2)); 7792 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7793 ins_encode %{ 7794 int vector_len = 1; 7795 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7796 %} 7797 ins_pipe( pipe_slow ); 7798 %} 7799 7800 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7801 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7802 match(Set dst (SubVD src (LoadVector mem))); 7803 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7804 ins_encode %{ 7805 int vector_len = 1; 7806 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7807 %} 7808 ins_pipe( pipe_slow ); 7809 %} 7810 7811 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7812 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7813 match(Set dst (SubVD src1 src2)); 7814 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7815 ins_encode %{ 7816 int vector_len = 2; 7817 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7823 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7824 match(Set dst (SubVD src (LoadVector mem))); 7825 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7826 ins_encode %{ 7827 int vector_len = 2; 7828 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7829 %} 7830 ins_pipe( pipe_slow ); 7831 %} 7832 7833 // --------------------------------- MUL -------------------------------------- 7834 7835 // Byte vector mul 7836 7837 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ 7838 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7839 match(Set dst (MulVB src1 src2)); 7840 effect(TEMP dst, TEMP tmp2, TEMP tmp); 7841 format %{"pmovsxbw $tmp,$src1\n\t" 7842 "pmovsxbw $tmp2,$src2\n\t" 7843 "pmullw $tmp,$tmp2\n\t" 7844 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 7845 "pand $tmp,$tmp2\n\t" 7846 "packuswb $tmp,$tmp\n\t" 7847 "movss $dst,$tmp\t! mul packed4B" %} 7848 ins_encode %{ 7849 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7850 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7851 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 7852 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 7853 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 7854 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 7855 __ movss($dst$$XMMRegister, $tmp$$XMMRegister); 7856 %} 7857 ins_pipe( pipe_slow ); 7858 %} 7859 7860 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ 7861 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 7862 match(Set dst (MulVB src1 src2)); 7863 effect(TEMP dst, TEMP tmp2, TEMP tmp); 7864 format %{"pmovsxbw $tmp,$src1\n\t" 7865 "pmovsxbw $tmp2,$src2\n\t" 7866 "pmullw $tmp,$tmp2\n\t" 7867 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 7868 "pand $tmp,$tmp2\n\t" 7869 "packuswb $tmp,$tmp\n\t" 7870 "movsd $dst,$tmp\t! mul packed8B" %} 7871 ins_encode %{ 7872 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7873 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7874 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 7875 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 7876 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 7877 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 7878 __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); 7879 %} 7880 ins_pipe( pipe_slow ); 7881 %} 7882 7883 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ 7884 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 7885 match(Set dst (MulVB src1 src2)); 7886 effect(TEMP tmp3, TEMP tmp2, TEMP tmp); 7887 format %{"pmovsxbw $tmp,$src1\n\t" 7888 "pmovsxbw $tmp2,$src2\n\t" 7889 "pmullw $tmp,$tmp2\n\t" 7890 "pshufd $tmp2,$src1\n\t" 7891 "pshufd $tmp3,$src2\n\t" 7892 "pmovsxbw $tmp2,$tmp2\n\t" 7893 "pmovsxbw $tmp3,$tmp3\n\t" 7894 "pmullw $tmp2,$tmp3\n\t" 7895 "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" 7896 "pand $tmp,$tmp3\n\t" 7897 "pand $tmp2,$tmp3\n\t" 7898 "packuswb $tmp,$tmp2\n\t" 7899 "movdqu $dst,$tmp \n\t! mul packed16B" %} 7900 ins_encode %{ 7901 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 7902 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 7903 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 7904 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); 7905 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); 7906 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 7907 __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); 7908 __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); 7909 __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 7910 __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); 7911 __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); 7912 __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); 7913 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 7914 %} 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ 7919 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 7920 match(Set dst (MulVB src1 src2)); 7921 effect(TEMP dst, TEMP tmp2, TEMP tmp); 7922 format %{"vpmovsxbw $tmp,$src1\n\t" 7923 "vpmovsxbw $tmp2,$src2\n\t" 7924 "vpmullw $tmp,$tmp2\n\t" 7925 "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 7926 "vpand $tmp,$tmp2\n\t" 7927 "vextracti128_high $tmp2,$tmp\n\t" 7928 "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} 7929 ins_encode %{ 7930 int vector_len = 1; 7931 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 7932 __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 7933 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7934 __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 7935 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7936 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 7937 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ 7943 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 7944 match(Set dst (MulVB src1 src2)); 7945 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7946 format %{"vextracti128_high $tmp1,$src1\n\t" 7947 "vextracti128_high $tmp3,$src2\n\t" 7948 "vpmovsxbw $tmp1,$tmp1\n\t" 7949 "vpmovsxbw $tmp3,$tmp3\n\t" 7950 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 7951 "vpmovsxbw $tmp2,$src1\n\t" 7952 "vpmovsxbw $tmp3,$src2\n\t" 7953 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 7954 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 7955 "vpbroadcastd $tmp3, $tmp3\n\t" 7956 "vpand $tmp2,$tmp2,$tmp3\n\t" 7957 "vpand $tmp1,$tmp1,$tmp3\n\t" 7958 "vpackuswb $dst,$tmp2,$tmp1\n\t" 7959 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 7960 ins_encode %{ 7961 int vector_len = 1; 7962 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 7963 __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); 7964 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7965 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 7966 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 7967 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 7968 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 7969 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 7970 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 7971 __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister); 7972 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 7973 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 7974 __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7975 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 7976 %} 7977 ins_pipe( pipe_slow ); 7978 %} 7979 7980 // Shorts/Chars vector mul 7981 instruct vmul2S(vecS dst, vecS src) %{ 7982 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7983 match(Set dst (MulVS dst src)); 7984 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7985 ins_encode %{ 7986 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7987 %} 7988 ins_pipe( pipe_slow ); 7989 %} 7990 7991 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7992 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7993 match(Set dst (MulVS src1 src2)); 7994 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7995 ins_encode %{ 7996 int vector_len = 0; 7997 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7998 %} 7999 ins_pipe( pipe_slow ); 8000 %} 8001 8002 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 8003 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8004 match(Set dst (MulVS src1 src2)); 8005 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 8006 ins_encode %{ 8007 int vector_len = 0; 8008 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8009 %} 8010 ins_pipe( pipe_slow ); 8011 %} 8012 8013 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 8014 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8015 match(Set dst (MulVS dst src2)); 8016 effect(TEMP src1); 8017 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 8018 ins_encode %{ 8019 int vector_len = 0; 8020 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8021 %} 8022 ins_pipe( pipe_slow ); 8023 %} 8024 8025 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 8026 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8027 match(Set dst (MulVS src (LoadVector mem))); 8028 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 8029 ins_encode %{ 8030 int vector_len = 0; 8031 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8032 %} 8033 ins_pipe( pipe_slow ); 8034 %} 8035 8036 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 8037 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8038 match(Set dst (MulVS src (LoadVector mem))); 8039 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 8040 ins_encode %{ 8041 int vector_len = 0; 8042 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8043 %} 8044 ins_pipe( pipe_slow ); 8045 %} 8046 8047 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 8048 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8049 match(Set dst (MulVS dst (LoadVector mem))); 8050 effect(TEMP src); 8051 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 8052 ins_encode %{ 8053 int vector_len = 0; 8054 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8055 %} 8056 ins_pipe( pipe_slow ); 8057 %} 8058 8059 instruct vmul4S(vecD dst, vecD src) %{ 8060 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8061 match(Set dst (MulVS dst src)); 8062 format %{ "pmullw $dst,$src\t! mul packed4S" %} 8063 ins_encode %{ 8064 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8070 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8071 match(Set dst (MulVS src1 src2)); 8072 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8073 ins_encode %{ 8074 int vector_len = 0; 8075 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8081 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8082 match(Set dst (MulVS src1 src2)); 8083 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8084 ins_encode %{ 8085 int vector_len = 0; 8086 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8092 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8093 match(Set dst (MulVS dst src2)); 8094 effect(TEMP src1); 8095 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 8096 ins_encode %{ 8097 int vector_len = 0; 8098 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8099 %} 8100 ins_pipe( pipe_slow ); 8101 %} 8102 8103 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 8104 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8105 match(Set dst (MulVS src (LoadVector mem))); 8106 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8107 ins_encode %{ 8108 int vector_len = 0; 8109 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8110 %} 8111 ins_pipe( pipe_slow ); 8112 %} 8113 8114 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 8115 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8116 match(Set dst (MulVS src (LoadVector mem))); 8117 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8118 ins_encode %{ 8119 int vector_len = 0; 8120 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8126 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8127 match(Set dst (MulVS dst (LoadVector mem))); 8128 effect(TEMP src); 8129 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 8130 ins_encode %{ 8131 int vector_len = 0; 8132 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vmul8S(vecX dst, vecX src) %{ 8138 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8139 match(Set dst (MulVS dst src)); 8140 format %{ "pmullw $dst,$src\t! mul packed8S" %} 8141 ins_encode %{ 8142 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8148 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8149 match(Set dst (MulVS src1 src2)); 8150 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8151 ins_encode %{ 8152 int vector_len = 0; 8153 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8154 %} 8155 ins_pipe( pipe_slow ); 8156 %} 8157 8158 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8159 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8160 match(Set dst (MulVS src1 src2)); 8161 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8162 ins_encode %{ 8163 int vector_len = 0; 8164 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8165 %} 8166 ins_pipe( pipe_slow ); 8167 %} 8168 8169 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8170 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8171 match(Set dst (MulVS dst src2)); 8172 effect(TEMP src1); 8173 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 8174 ins_encode %{ 8175 int vector_len = 0; 8176 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8177 %} 8178 ins_pipe( pipe_slow ); 8179 %} 8180 8181 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8182 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8183 match(Set dst (MulVS src (LoadVector mem))); 8184 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8185 ins_encode %{ 8186 int vector_len = 0; 8187 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8188 %} 8189 ins_pipe( pipe_slow ); 8190 %} 8191 8192 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8193 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8194 match(Set dst (MulVS src (LoadVector mem))); 8195 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8196 ins_encode %{ 8197 int vector_len = 0; 8198 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8199 %} 8200 ins_pipe( pipe_slow ); 8201 %} 8202 8203 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8204 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8205 match(Set dst (MulVS dst (LoadVector mem))); 8206 effect(TEMP src); 8207 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 8208 ins_encode %{ 8209 int vector_len = 0; 8210 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8211 %} 8212 ins_pipe( pipe_slow ); 8213 %} 8214 8215 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8216 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8217 match(Set dst (MulVS src1 src2)); 8218 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8219 ins_encode %{ 8220 int vector_len = 1; 8221 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8222 %} 8223 ins_pipe( pipe_slow ); 8224 %} 8225 8226 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8227 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8228 match(Set dst (MulVS src1 src2)); 8229 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8230 ins_encode %{ 8231 int vector_len = 1; 8232 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8238 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8239 match(Set dst (MulVS dst src2)); 8240 effect(TEMP src1); 8241 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 8242 ins_encode %{ 8243 int vector_len = 1; 8244 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8245 %} 8246 ins_pipe( pipe_slow ); 8247 %} 8248 8249 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8250 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8251 match(Set dst (MulVS src (LoadVector mem))); 8252 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8253 ins_encode %{ 8254 int vector_len = 1; 8255 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8256 %} 8257 ins_pipe( pipe_slow ); 8258 %} 8259 8260 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8261 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8262 match(Set dst (MulVS src (LoadVector mem))); 8263 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8264 ins_encode %{ 8265 int vector_len = 1; 8266 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8267 %} 8268 ins_pipe( pipe_slow ); 8269 %} 8270 8271 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8272 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8273 match(Set dst (MulVS dst (LoadVector mem))); 8274 effect(TEMP src); 8275 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 8276 ins_encode %{ 8277 int vector_len = 1; 8278 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8279 %} 8280 ins_pipe( pipe_slow ); 8281 %} 8282 8283 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8284 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8285 match(Set dst (MulVS src1 src2)); 8286 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 8287 ins_encode %{ 8288 int vector_len = 2; 8289 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8290 %} 8291 ins_pipe( pipe_slow ); 8292 %} 8293 8294 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 8295 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8296 match(Set dst (MulVS src (LoadVector mem))); 8297 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 8298 ins_encode %{ 8299 int vector_len = 2; 8300 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8301 %} 8302 ins_pipe( pipe_slow ); 8303 %} 8304 8305 // Integers vector mul (sse4_1) 8306 instruct vmul2I(vecD dst, vecD src) %{ 8307 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 8308 match(Set dst (MulVI dst src)); 8309 format %{ "pmulld $dst,$src\t! mul packed2I" %} 8310 ins_encode %{ 8311 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8312 %} 8313 ins_pipe( pipe_slow ); 8314 %} 8315 8316 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 8317 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8318 match(Set dst (MulVI src1 src2)); 8319 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 8320 ins_encode %{ 8321 int vector_len = 0; 8322 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8323 %} 8324 ins_pipe( pipe_slow ); 8325 %} 8326 8327 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 8328 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8329 match(Set dst (MulVI src (LoadVector mem))); 8330 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 8331 ins_encode %{ 8332 int vector_len = 0; 8333 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8334 %} 8335 ins_pipe( pipe_slow ); 8336 %} 8337 8338 instruct vmul4I(vecX dst, vecX src) %{ 8339 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 8340 match(Set dst (MulVI dst src)); 8341 format %{ "pmulld $dst,$src\t! mul packed4I" %} 8342 ins_encode %{ 8343 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 8344 %} 8345 ins_pipe( pipe_slow ); 8346 %} 8347 8348 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 8349 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8350 match(Set dst (MulVI src1 src2)); 8351 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 8352 ins_encode %{ 8353 int vector_len = 0; 8354 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8355 %} 8356 ins_pipe( pipe_slow ); 8357 %} 8358 8359 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8361 match(Set dst (MulVI src (LoadVector mem))); 8362 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8363 ins_encode %{ 8364 int vector_len = 0; 8365 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8366 %} 8367 ins_pipe( pipe_slow ); 8368 %} 8369 8370 // Long vector mul 8371 8372 instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ 8373 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); 8374 match(Set dst (MulVL dst src2)); 8375 effect(TEMP dst, TEMP tmp); 8376 format %{ "pshufd $tmp,$src2, 177\n\t" 8377 "pmulld $tmp,$dst\n\t" 8378 "phaddd $tmp,$tmp\n\t" 8379 "pmovzxdq $tmp,$tmp\n\t" 8380 "psllq $tmp, 32\n\t" 8381 "pmuludq $dst,$src2\n\t" 8382 "paddq $dst,$tmp\n\t! mul packed2L" %} 8383 8384 ins_encode %{ 8385 int vector_len = 0; 8386 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 8387 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 8388 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 8389 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 8390 __ psllq($tmp$$XMMRegister, 32); 8391 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 8392 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ 8398 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); 8399 match(Set dst (MulVL src1 src2)); 8400 effect(TEMP tmp1, TEMP tmp); 8401 format %{ "vpshufd $tmp,$src2\n\t" 8402 "vpmulld $tmp,$src1,$tmp\n\t" 8403 "vphaddd $tmp,$tmp,$tmp\n\t" 8404 "vpmovzxdq $tmp,$tmp\n\t" 8405 "vpsllq $tmp,$tmp\n\t" 8406 "vpmuludq $tmp1,$src1,$src2\n\t" 8407 "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} 8408 ins_encode %{ 8409 int vector_len = 0; 8410 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 8411 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 8412 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8413 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8414 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 8415 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8416 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8417 %} 8418 ins_pipe( pipe_slow ); 8419 %} 8420 8421 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8422 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8423 match(Set dst (MulVL src1 src2)); 8424 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8425 ins_encode %{ 8426 int vector_len = 0; 8427 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8428 %} 8429 ins_pipe( pipe_slow ); 8430 %} 8431 8432 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8433 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8434 match(Set dst (MulVL src (LoadVector mem))); 8435 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8436 ins_encode %{ 8437 int vector_len = 0; 8438 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8439 %} 8440 ins_pipe( pipe_slow ); 8441 %} 8442 8443 instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ 8444 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); 8445 match(Set dst (MulVL src1 src2)); 8446 effect(TEMP tmp1, TEMP tmp); 8447 format %{ "vpshufd $tmp,$src2\n\t" 8448 "vpmulld $tmp,$src1,$tmp\n\t" 8449 "vphaddd $tmp,$tmp,$tmp\n\t" 8450 "vpmovzxdq $tmp,$tmp\n\t" 8451 "vpsllq $tmp,$tmp\n\t" 8452 "vpmuludq $tmp1,$src1,$src2\n\t" 8453 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 8454 ins_encode %{ 8455 int vector_len = 1; 8456 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 8457 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 8458 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8459 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 8460 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 8461 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8462 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 8463 %} 8464 ins_pipe( pipe_slow ); 8465 %} 8466 8467 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8468 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8469 match(Set dst (MulVL src1 src2)); 8470 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8471 ins_encode %{ 8472 int vector_len = 1; 8473 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8479 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8480 match(Set dst (MulVL src (LoadVector mem))); 8481 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8482 ins_encode %{ 8483 int vector_len = 1; 8484 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8490 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8491 match(Set dst (MulVL src1 src2)); 8492 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8493 ins_encode %{ 8494 int vector_len = 2; 8495 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8501 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8502 match(Set dst (MulVL src (LoadVector mem))); 8503 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8504 ins_encode %{ 8505 int vector_len = 2; 8506 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8512 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8513 match(Set dst (MulVI src1 src2)); 8514 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8515 ins_encode %{ 8516 int vector_len = 1; 8517 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8518 %} 8519 ins_pipe( pipe_slow ); 8520 %} 8521 8522 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8523 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8524 match(Set dst (MulVI src (LoadVector mem))); 8525 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8526 ins_encode %{ 8527 int vector_len = 1; 8528 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8529 %} 8530 ins_pipe( pipe_slow ); 8531 %} 8532 8533 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8534 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8535 match(Set dst (MulVI src1 src2)); 8536 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8537 ins_encode %{ 8538 int vector_len = 2; 8539 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8540 %} 8541 ins_pipe( pipe_slow ); 8542 %} 8543 8544 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8545 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8546 match(Set dst (MulVI src (LoadVector mem))); 8547 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8548 ins_encode %{ 8549 int vector_len = 2; 8550 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8551 %} 8552 ins_pipe( pipe_slow ); 8553 %} 8554 8555 // Floats vector mul 8556 instruct vmul2F(vecD dst, vecD src) %{ 8557 predicate(n->as_Vector()->length() == 2); 8558 match(Set dst (MulVF dst src)); 8559 format %{ "mulps $dst,$src\t! mul packed2F" %} 8560 ins_encode %{ 8561 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8562 %} 8563 ins_pipe( pipe_slow ); 8564 %} 8565 8566 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8567 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8568 match(Set dst (MulVF src1 src2)); 8569 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8570 ins_encode %{ 8571 int vector_len = 0; 8572 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8573 %} 8574 ins_pipe( pipe_slow ); 8575 %} 8576 8577 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8578 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8579 match(Set dst (MulVF src (LoadVector mem))); 8580 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8581 ins_encode %{ 8582 int vector_len = 0; 8583 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8584 %} 8585 ins_pipe( pipe_slow ); 8586 %} 8587 8588 instruct vmul4F(vecX dst, vecX src) %{ 8589 predicate(n->as_Vector()->length() == 4); 8590 match(Set dst (MulVF dst src)); 8591 format %{ "mulps $dst,$src\t! mul packed4F" %} 8592 ins_encode %{ 8593 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8594 %} 8595 ins_pipe( pipe_slow ); 8596 %} 8597 8598 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8599 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8600 match(Set dst (MulVF src1 src2)); 8601 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8602 ins_encode %{ 8603 int vector_len = 0; 8604 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8605 %} 8606 ins_pipe( pipe_slow ); 8607 %} 8608 8609 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8610 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8611 match(Set dst (MulVF src (LoadVector mem))); 8612 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8613 ins_encode %{ 8614 int vector_len = 0; 8615 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8616 %} 8617 ins_pipe( pipe_slow ); 8618 %} 8619 8620 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8621 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8622 match(Set dst (MulVF src1 src2)); 8623 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8624 ins_encode %{ 8625 int vector_len = 1; 8626 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8627 %} 8628 ins_pipe( pipe_slow ); 8629 %} 8630 8631 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8632 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8633 match(Set dst (MulVF src (LoadVector mem))); 8634 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8635 ins_encode %{ 8636 int vector_len = 1; 8637 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8638 %} 8639 ins_pipe( pipe_slow ); 8640 %} 8641 8642 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8643 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8644 match(Set dst (MulVF src1 src2)); 8645 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8646 ins_encode %{ 8647 int vector_len = 2; 8648 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8649 %} 8650 ins_pipe( pipe_slow ); 8651 %} 8652 8653 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8654 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8655 match(Set dst (MulVF src (LoadVector mem))); 8656 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8657 ins_encode %{ 8658 int vector_len = 2; 8659 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8660 %} 8661 ins_pipe( pipe_slow ); 8662 %} 8663 8664 // Doubles vector mul 8665 instruct vmul2D(vecX dst, vecX src) %{ 8666 predicate(n->as_Vector()->length() == 2); 8667 match(Set dst (MulVD dst src)); 8668 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8669 ins_encode %{ 8670 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8671 %} 8672 ins_pipe( pipe_slow ); 8673 %} 8674 8675 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8676 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8677 match(Set dst (MulVD src1 src2)); 8678 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8679 ins_encode %{ 8680 int vector_len = 0; 8681 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8682 %} 8683 ins_pipe( pipe_slow ); 8684 %} 8685 8686 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8687 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8688 match(Set dst (MulVD src (LoadVector mem))); 8689 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8690 ins_encode %{ 8691 int vector_len = 0; 8692 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8693 %} 8694 ins_pipe( pipe_slow ); 8695 %} 8696 8697 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8698 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8699 match(Set dst (MulVD src1 src2)); 8700 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8701 ins_encode %{ 8702 int vector_len = 1; 8703 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8704 %} 8705 ins_pipe( pipe_slow ); 8706 %} 8707 8708 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8709 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8710 match(Set dst (MulVD src (LoadVector mem))); 8711 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8712 ins_encode %{ 8713 int vector_len = 1; 8714 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8715 %} 8716 ins_pipe( pipe_slow ); 8717 %} 8718 8719 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8720 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8721 match(Set dst (MulVD src1 src2)); 8722 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8723 ins_encode %{ 8724 int vector_len = 2; 8725 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8726 %} 8727 ins_pipe( pipe_slow ); 8728 %} 8729 8730 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8731 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8732 match(Set dst (MulVD src (LoadVector mem))); 8733 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8734 ins_encode %{ 8735 int vector_len = 2; 8736 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8737 %} 8738 ins_pipe( pipe_slow ); 8739 %} 8740 8741 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8742 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 8743 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8744 effect(TEMP dst, USE src1, USE src2); 8745 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8746 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8747 %} 8748 ins_encode %{ 8749 int vector_len = 1; 8750 int cond = (Assembler::Condition)($copnd$$cmpcode); 8751 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8752 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8758 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8759 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8760 effect(TEMP dst, USE src1, USE src2); 8761 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8762 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8763 %} 8764 ins_encode %{ 8765 int vector_len = 1; 8766 int cond = (Assembler::Condition)($copnd$$cmpcode); 8767 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8768 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8769 %} 8770 ins_pipe( pipe_slow ); 8771 %} 8772 8773 // --------------------------------- DIV -------------------------------------- 8774 8775 // Floats vector div 8776 instruct vdiv2F(vecD dst, vecD src) %{ 8777 predicate(n->as_Vector()->length() == 2); 8778 match(Set dst (DivVF dst src)); 8779 format %{ "divps $dst,$src\t! div packed2F" %} 8780 ins_encode %{ 8781 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8782 %} 8783 ins_pipe( pipe_slow ); 8784 %} 8785 8786 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8787 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8788 match(Set dst (DivVF src1 src2)); 8789 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8790 ins_encode %{ 8791 int vector_len = 0; 8792 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8793 %} 8794 ins_pipe( pipe_slow ); 8795 %} 8796 8797 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8798 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8799 match(Set dst (DivVF src (LoadVector mem))); 8800 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8801 ins_encode %{ 8802 int vector_len = 0; 8803 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8804 %} 8805 ins_pipe( pipe_slow ); 8806 %} 8807 8808 instruct vdiv4F(vecX dst, vecX src) %{ 8809 predicate(n->as_Vector()->length() == 4); 8810 match(Set dst (DivVF dst src)); 8811 format %{ "divps $dst,$src\t! div packed4F" %} 8812 ins_encode %{ 8813 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8814 %} 8815 ins_pipe( pipe_slow ); 8816 %} 8817 8818 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8819 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8820 match(Set dst (DivVF src1 src2)); 8821 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8822 ins_encode %{ 8823 int vector_len = 0; 8824 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8825 %} 8826 ins_pipe( pipe_slow ); 8827 %} 8828 8829 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8830 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8831 match(Set dst (DivVF src (LoadVector mem))); 8832 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8833 ins_encode %{ 8834 int vector_len = 0; 8835 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8836 %} 8837 ins_pipe( pipe_slow ); 8838 %} 8839 8840 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8841 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8842 match(Set dst (DivVF src1 src2)); 8843 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8844 ins_encode %{ 8845 int vector_len = 1; 8846 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8847 %} 8848 ins_pipe( pipe_slow ); 8849 %} 8850 8851 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8852 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8853 match(Set dst (DivVF src (LoadVector mem))); 8854 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8855 ins_encode %{ 8856 int vector_len = 1; 8857 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8858 %} 8859 ins_pipe( pipe_slow ); 8860 %} 8861 8862 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8863 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8864 match(Set dst (DivVF src1 src2)); 8865 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8866 ins_encode %{ 8867 int vector_len = 2; 8868 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8869 %} 8870 ins_pipe( pipe_slow ); 8871 %} 8872 8873 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8874 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8875 match(Set dst (DivVF src (LoadVector mem))); 8876 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8877 ins_encode %{ 8878 int vector_len = 2; 8879 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8880 %} 8881 ins_pipe( pipe_slow ); 8882 %} 8883 8884 // Doubles vector div 8885 instruct vdiv2D(vecX dst, vecX src) %{ 8886 predicate(n->as_Vector()->length() == 2); 8887 match(Set dst (DivVD dst src)); 8888 format %{ "divpd $dst,$src\t! div packed2D" %} 8889 ins_encode %{ 8890 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8896 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8897 match(Set dst (DivVD src1 src2)); 8898 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8899 ins_encode %{ 8900 int vector_len = 0; 8901 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 8906 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8907 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8908 match(Set dst (DivVD src (LoadVector mem))); 8909 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8910 ins_encode %{ 8911 int vector_len = 0; 8912 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8913 %} 8914 ins_pipe( pipe_slow ); 8915 %} 8916 8917 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8918 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8919 match(Set dst (DivVD src1 src2)); 8920 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8921 ins_encode %{ 8922 int vector_len = 1; 8923 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8924 %} 8925 ins_pipe( pipe_slow ); 8926 %} 8927 8928 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8929 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8930 match(Set dst (DivVD src (LoadVector mem))); 8931 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8932 ins_encode %{ 8933 int vector_len = 1; 8934 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8935 %} 8936 ins_pipe( pipe_slow ); 8937 %} 8938 8939 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8940 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8941 match(Set dst (DivVD src1 src2)); 8942 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8943 ins_encode %{ 8944 int vector_len = 2; 8945 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8946 %} 8947 ins_pipe( pipe_slow ); 8948 %} 8949 8950 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8951 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8952 match(Set dst (DivVD src (LoadVector mem))); 8953 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8954 ins_encode %{ 8955 int vector_len = 2; 8956 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8957 %} 8958 ins_pipe( pipe_slow ); 8959 %} 8960 8961 // ------------------------------ Shift --------------------------------------- 8962 8963 // Left and right shift count vectors are the same on x86 8964 // (only lowest bits of xmm reg are used for count). 8965 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8966 match(Set dst (LShiftCntV cnt)); 8967 match(Set dst (RShiftCntV cnt)); 8968 format %{ "movd $dst,$cnt\t! load shift count" %} 8969 ins_encode %{ 8970 __ movdl($dst$$XMMRegister, $cnt$$Register); 8971 %} 8972 ins_pipe( pipe_slow ); 8973 %} 8974 8975 // --------------------------------- Sqrt -------------------------------------- 8976 8977 // Floating point vector sqrt 8978 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8979 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8980 match(Set dst (SqrtVD src)); 8981 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8982 ins_encode %{ 8983 int vector_len = 0; 8984 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8985 %} 8986 ins_pipe( pipe_slow ); 8987 %} 8988 8989 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8990 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8991 match(Set dst (SqrtVD (LoadVector mem))); 8992 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8993 ins_encode %{ 8994 int vector_len = 0; 8995 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8996 %} 8997 ins_pipe( pipe_slow ); 8998 %} 8999 9000 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 9001 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9002 match(Set dst (SqrtVD src)); 9003 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 9004 ins_encode %{ 9005 int vector_len = 1; 9006 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9007 %} 9008 ins_pipe( pipe_slow ); 9009 %} 9010 9011 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 9012 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9013 match(Set dst (SqrtVD (LoadVector mem))); 9014 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 9015 ins_encode %{ 9016 int vector_len = 1; 9017 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 9018 %} 9019 ins_pipe( pipe_slow ); 9020 %} 9021 9022 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 9023 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9024 match(Set dst (SqrtVD src)); 9025 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 9026 ins_encode %{ 9027 int vector_len = 2; 9028 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9029 %} 9030 ins_pipe( pipe_slow ); 9031 %} 9032 9033 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 9034 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9035 match(Set dst (SqrtVD (LoadVector mem))); 9036 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 9037 ins_encode %{ 9038 int vector_len = 2; 9039 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 9045 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9046 match(Set dst (SqrtVF src)); 9047 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 9048 ins_encode %{ 9049 int vector_len = 0; 9050 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9051 %} 9052 ins_pipe( pipe_slow ); 9053 %} 9054 9055 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 9056 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9057 match(Set dst (SqrtVF (LoadVector mem))); 9058 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 9059 ins_encode %{ 9060 int vector_len = 0; 9061 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9062 %} 9063 ins_pipe( pipe_slow ); 9064 %} 9065 9066 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 9067 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9068 match(Set dst (SqrtVF src)); 9069 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 9070 ins_encode %{ 9071 int vector_len = 0; 9072 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9073 %} 9074 ins_pipe( pipe_slow ); 9075 %} 9076 9077 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 9078 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9079 match(Set dst (SqrtVF (LoadVector mem))); 9080 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 9081 ins_encode %{ 9082 int vector_len = 0; 9083 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9084 %} 9085 ins_pipe( pipe_slow ); 9086 %} 9087 9088 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 9089 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9090 match(Set dst (SqrtVF src)); 9091 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 9092 ins_encode %{ 9093 int vector_len = 1; 9094 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9095 %} 9096 ins_pipe( pipe_slow ); 9097 %} 9098 9099 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 9100 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9101 match(Set dst (SqrtVF (LoadVector mem))); 9102 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 9103 ins_encode %{ 9104 int vector_len = 1; 9105 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9106 %} 9107 ins_pipe( pipe_slow ); 9108 %} 9109 9110 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 9111 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9112 match(Set dst (SqrtVF src)); 9113 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 9114 ins_encode %{ 9115 int vector_len = 2; 9116 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 9117 %} 9118 ins_pipe( pipe_slow ); 9119 %} 9120 9121 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 9122 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9123 match(Set dst (SqrtVF (LoadVector mem))); 9124 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 9125 ins_encode %{ 9126 int vector_len = 2; 9127 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 9128 %} 9129 ins_pipe( pipe_slow ); 9130 %} 9131 9132 // ------------------------------ LeftShift ----------------------------------- 9133 9134 // Shorts/Chars vector left shift 9135 instruct vsll2S(vecS dst, vecS shift) %{ 9136 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9137 match(Set dst (LShiftVS dst shift)); 9138 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 9139 ins_encode %{ 9140 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9141 %} 9142 ins_pipe( pipe_slow ); 9143 %} 9144 9145 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 9146 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9147 match(Set dst (LShiftVS dst shift)); 9148 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 9149 ins_encode %{ 9150 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9151 %} 9152 ins_pipe( pipe_slow ); 9153 %} 9154 9155 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9156 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9157 match(Set dst (LShiftVS src shift)); 9158 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9159 ins_encode %{ 9160 int vector_len = 0; 9161 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9162 %} 9163 ins_pipe( pipe_slow ); 9164 %} 9165 9166 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9167 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9168 match(Set dst (LShiftVS src shift)); 9169 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9170 ins_encode %{ 9171 int vector_len = 0; 9172 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9173 %} 9174 ins_pipe( pipe_slow ); 9175 %} 9176 9177 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9178 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9179 match(Set dst (LShiftVS dst shift)); 9180 effect(TEMP src); 9181 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9182 ins_encode %{ 9183 int vector_len = 0; 9184 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9185 %} 9186 ins_pipe( pipe_slow ); 9187 %} 9188 9189 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9190 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9191 match(Set dst (LShiftVS src shift)); 9192 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9193 ins_encode %{ 9194 int vector_len = 0; 9195 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9196 %} 9197 ins_pipe( pipe_slow ); 9198 %} 9199 9200 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9201 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9202 match(Set dst (LShiftVS src shift)); 9203 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9204 ins_encode %{ 9205 int vector_len = 0; 9206 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9207 %} 9208 ins_pipe( pipe_slow ); 9209 %} 9210 9211 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9212 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9213 match(Set dst (LShiftVS dst shift)); 9214 effect(TEMP src); 9215 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 9216 ins_encode %{ 9217 int vector_len = 0; 9218 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9219 %} 9220 ins_pipe( pipe_slow ); 9221 %} 9222 9223 instruct vsll4S(vecD dst, vecS shift) %{ 9224 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9225 match(Set dst (LShiftVS dst shift)); 9226 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 9227 ins_encode %{ 9228 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9229 %} 9230 ins_pipe( pipe_slow ); 9231 %} 9232 9233 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 9234 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9235 match(Set dst (LShiftVS dst shift)); 9236 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 9237 ins_encode %{ 9238 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9239 %} 9240 ins_pipe( pipe_slow ); 9241 %} 9242 9243 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9244 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9245 match(Set dst (LShiftVS src shift)); 9246 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9247 ins_encode %{ 9248 int vector_len = 0; 9249 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9250 %} 9251 ins_pipe( pipe_slow ); 9252 %} 9253 9254 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9255 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9256 match(Set dst (LShiftVS src shift)); 9257 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9258 ins_encode %{ 9259 int vector_len = 0; 9260 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9261 %} 9262 ins_pipe( pipe_slow ); 9263 %} 9264 9265 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9266 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9267 match(Set dst (LShiftVS dst shift)); 9268 effect(TEMP src); 9269 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9270 ins_encode %{ 9271 int vector_len = 0; 9272 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9273 %} 9274 ins_pipe( pipe_slow ); 9275 %} 9276 9277 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9278 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9279 match(Set dst (LShiftVS src shift)); 9280 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9281 ins_encode %{ 9282 int vector_len = 0; 9283 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9284 %} 9285 ins_pipe( pipe_slow ); 9286 %} 9287 9288 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9289 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9290 match(Set dst (LShiftVS src shift)); 9291 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9292 ins_encode %{ 9293 int vector_len = 0; 9294 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9295 %} 9296 ins_pipe( pipe_slow ); 9297 %} 9298 9299 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9300 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9301 match(Set dst (LShiftVS dst shift)); 9302 effect(TEMP src); 9303 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 9304 ins_encode %{ 9305 int vector_len = 0; 9306 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct vsll8S(vecX dst, vecS shift) %{ 9312 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9313 match(Set dst (LShiftVS dst shift)); 9314 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9315 ins_encode %{ 9316 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 9317 %} 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 9322 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9323 match(Set dst (LShiftVS dst shift)); 9324 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 9325 ins_encode %{ 9326 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 9327 %} 9328 ins_pipe( pipe_slow ); 9329 %} 9330 9331 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9332 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9333 match(Set dst (LShiftVS src shift)); 9334 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9335 ins_encode %{ 9336 int vector_len = 0; 9337 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9338 %} 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9343 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9344 match(Set dst (LShiftVS src shift)); 9345 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9346 ins_encode %{ 9347 int vector_len = 0; 9348 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9349 %} 9350 ins_pipe( pipe_slow ); 9351 %} 9352 9353 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9354 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9355 match(Set dst (LShiftVS dst shift)); 9356 effect(TEMP src); 9357 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9358 ins_encode %{ 9359 int vector_len = 0; 9360 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9361 %} 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9366 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9367 match(Set dst (LShiftVS src shift)); 9368 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9369 ins_encode %{ 9370 int vector_len = 0; 9371 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9378 match(Set dst (LShiftVS src shift)); 9379 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9380 ins_encode %{ 9381 int vector_len = 0; 9382 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9383 %} 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9388 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9389 match(Set dst (LShiftVS dst shift)); 9390 effect(TEMP src); 9391 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 9392 ins_encode %{ 9393 int vector_len = 0; 9394 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9400 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9401 match(Set dst (LShiftVS src shift)); 9402 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9403 ins_encode %{ 9404 int vector_len = 1; 9405 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9406 %} 9407 ins_pipe( pipe_slow ); 9408 %} 9409 9410 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9411 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9412 match(Set dst (LShiftVS src shift)); 9413 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9414 ins_encode %{ 9415 int vector_len = 1; 9416 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9417 %} 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9422 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9423 match(Set dst (LShiftVS dst shift)); 9424 effect(TEMP src); 9425 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9426 ins_encode %{ 9427 int vector_len = 1; 9428 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9434 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9435 match(Set dst (LShiftVS src shift)); 9436 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9437 ins_encode %{ 9438 int vector_len = 1; 9439 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9445 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9446 match(Set dst (LShiftVS src shift)); 9447 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9448 ins_encode %{ 9449 int vector_len = 1; 9450 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9451 %} 9452 ins_pipe( pipe_slow ); 9453 %} 9454 9455 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9456 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9457 match(Set dst (LShiftVS dst shift)); 9458 effect(TEMP src); 9459 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9460 ins_encode %{ 9461 int vector_len = 1; 9462 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9463 %} 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9468 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9469 match(Set dst (LShiftVS src shift)); 9470 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9471 ins_encode %{ 9472 int vector_len = 2; 9473 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9474 %} 9475 ins_pipe( pipe_slow ); 9476 %} 9477 9478 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9479 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9480 match(Set dst (LShiftVS src shift)); 9481 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9482 ins_encode %{ 9483 int vector_len = 2; 9484 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 // Integers vector left shift 9490 instruct vsll2I(vecD dst, vecS shift) %{ 9491 predicate(n->as_Vector()->length() == 2); 9492 match(Set dst (LShiftVI dst shift)); 9493 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9494 ins_encode %{ 9495 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9496 %} 9497 ins_pipe( pipe_slow ); 9498 %} 9499 9500 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9501 predicate(n->as_Vector()->length() == 2); 9502 match(Set dst (LShiftVI dst shift)); 9503 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9504 ins_encode %{ 9505 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9506 %} 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9511 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9512 match(Set dst (LShiftVI src shift)); 9513 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9514 ins_encode %{ 9515 int vector_len = 0; 9516 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9517 %} 9518 ins_pipe( pipe_slow ); 9519 %} 9520 9521 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9523 match(Set dst (LShiftVI src shift)); 9524 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9525 ins_encode %{ 9526 int vector_len = 0; 9527 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9528 %} 9529 ins_pipe( pipe_slow ); 9530 %} 9531 9532 instruct vsll4I(vecX dst, vecS shift) %{ 9533 predicate(n->as_Vector()->length() == 4); 9534 match(Set dst (LShiftVI dst shift)); 9535 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9536 ins_encode %{ 9537 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9538 %} 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9543 predicate(n->as_Vector()->length() == 4); 9544 match(Set dst (LShiftVI dst shift)); 9545 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9546 ins_encode %{ 9547 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9553 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9554 match(Set dst (LShiftVI src shift)); 9555 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9556 ins_encode %{ 9557 int vector_len = 0; 9558 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9564 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9565 match(Set dst (LShiftVI src shift)); 9566 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9567 ins_encode %{ 9568 int vector_len = 0; 9569 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9570 %} 9571 ins_pipe( pipe_slow ); 9572 %} 9573 9574 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9575 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9576 match(Set dst (LShiftVI src shift)); 9577 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9578 ins_encode %{ 9579 int vector_len = 1; 9580 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9586 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9587 match(Set dst (LShiftVI src shift)); 9588 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9589 ins_encode %{ 9590 int vector_len = 1; 9591 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9592 %} 9593 ins_pipe( pipe_slow ); 9594 %} 9595 9596 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9597 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9598 match(Set dst (LShiftVI src shift)); 9599 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9600 ins_encode %{ 9601 int vector_len = 2; 9602 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9608 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9609 match(Set dst (LShiftVI src shift)); 9610 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9611 ins_encode %{ 9612 int vector_len = 2; 9613 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9614 %} 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 // Longs vector left shift 9619 instruct vsll2L(vecX dst, vecS shift) %{ 9620 predicate(n->as_Vector()->length() == 2); 9621 match(Set dst (LShiftVL dst shift)); 9622 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9623 ins_encode %{ 9624 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9625 %} 9626 ins_pipe( pipe_slow ); 9627 %} 9628 9629 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9630 predicate(n->as_Vector()->length() == 2); 9631 match(Set dst (LShiftVL dst shift)); 9632 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9633 ins_encode %{ 9634 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9635 %} 9636 ins_pipe( pipe_slow ); 9637 %} 9638 9639 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9640 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9641 match(Set dst (LShiftVL src shift)); 9642 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9643 ins_encode %{ 9644 int vector_len = 0; 9645 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9646 %} 9647 ins_pipe( pipe_slow ); 9648 %} 9649 9650 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9651 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9652 match(Set dst (LShiftVL src shift)); 9653 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9654 ins_encode %{ 9655 int vector_len = 0; 9656 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9657 %} 9658 ins_pipe( pipe_slow ); 9659 %} 9660 9661 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9662 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9663 match(Set dst (LShiftVL src shift)); 9664 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9665 ins_encode %{ 9666 int vector_len = 1; 9667 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9668 %} 9669 ins_pipe( pipe_slow ); 9670 %} 9671 9672 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9673 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9674 match(Set dst (LShiftVL src shift)); 9675 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9676 ins_encode %{ 9677 int vector_len = 1; 9678 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9679 %} 9680 ins_pipe( pipe_slow ); 9681 %} 9682 9683 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9684 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9685 match(Set dst (LShiftVL src shift)); 9686 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9687 ins_encode %{ 9688 int vector_len = 2; 9689 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9690 %} 9691 ins_pipe( pipe_slow ); 9692 %} 9693 9694 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9695 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9696 match(Set dst (LShiftVL src shift)); 9697 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9698 ins_encode %{ 9699 int vector_len = 2; 9700 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 // ----------------------- LogicalRightShift ----------------------------------- 9706 9707 // Shorts vector logical right shift produces incorrect Java result 9708 // for negative data because java code convert short value into int with 9709 // sign extension before a shift. But char vectors are fine since chars are 9710 // unsigned values. 9711 9712 instruct vsrl2S(vecS dst, vecS shift) %{ 9713 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9714 match(Set dst (URShiftVS dst shift)); 9715 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9716 ins_encode %{ 9717 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9718 %} 9719 ins_pipe( pipe_slow ); 9720 %} 9721 9722 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9723 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9724 match(Set dst (URShiftVS dst shift)); 9725 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9726 ins_encode %{ 9727 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9728 %} 9729 ins_pipe( pipe_slow ); 9730 %} 9731 9732 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9733 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9734 match(Set dst (URShiftVS src shift)); 9735 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9736 ins_encode %{ 9737 int vector_len = 0; 9738 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9739 %} 9740 ins_pipe( pipe_slow ); 9741 %} 9742 9743 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9744 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9745 match(Set dst (URShiftVS src shift)); 9746 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9747 ins_encode %{ 9748 int vector_len = 0; 9749 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9755 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9756 match(Set dst (URShiftVS dst shift)); 9757 effect(TEMP src); 9758 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9759 ins_encode %{ 9760 int vector_len = 0; 9761 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9762 %} 9763 ins_pipe( pipe_slow ); 9764 %} 9765 9766 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9767 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9768 match(Set dst (URShiftVS src shift)); 9769 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9770 ins_encode %{ 9771 int vector_len = 0; 9772 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9773 %} 9774 ins_pipe( pipe_slow ); 9775 %} 9776 9777 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9778 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9779 match(Set dst (URShiftVS src shift)); 9780 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9781 ins_encode %{ 9782 int vector_len = 0; 9783 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9784 %} 9785 ins_pipe( pipe_slow ); 9786 %} 9787 9788 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9789 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9790 match(Set dst (URShiftVS dst shift)); 9791 effect(TEMP src); 9792 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9793 ins_encode %{ 9794 int vector_len = 0; 9795 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9796 %} 9797 ins_pipe( pipe_slow ); 9798 %} 9799 9800 instruct vsrl4S(vecD dst, vecS shift) %{ 9801 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9802 match(Set dst (URShiftVS dst shift)); 9803 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9804 ins_encode %{ 9805 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9806 %} 9807 ins_pipe( pipe_slow ); 9808 %} 9809 9810 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9811 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9812 match(Set dst (URShiftVS dst shift)); 9813 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9814 ins_encode %{ 9815 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9816 %} 9817 ins_pipe( pipe_slow ); 9818 %} 9819 9820 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9821 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9822 match(Set dst (URShiftVS src shift)); 9823 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9824 ins_encode %{ 9825 int vector_len = 0; 9826 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9827 %} 9828 ins_pipe( pipe_slow ); 9829 %} 9830 9831 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9832 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9833 match(Set dst (URShiftVS src shift)); 9834 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9835 ins_encode %{ 9836 int vector_len = 0; 9837 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9838 %} 9839 ins_pipe( pipe_slow ); 9840 %} 9841 9842 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9843 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9844 match(Set dst (URShiftVS dst shift)); 9845 effect(TEMP src); 9846 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9847 ins_encode %{ 9848 int vector_len = 0; 9849 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9850 %} 9851 ins_pipe( pipe_slow ); 9852 %} 9853 9854 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9855 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9856 match(Set dst (URShiftVS src shift)); 9857 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9858 ins_encode %{ 9859 int vector_len = 0; 9860 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9861 %} 9862 ins_pipe( pipe_slow ); 9863 %} 9864 9865 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9866 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9867 match(Set dst (URShiftVS src shift)); 9868 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9869 ins_encode %{ 9870 int vector_len = 0; 9871 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9872 %} 9873 ins_pipe( pipe_slow ); 9874 %} 9875 9876 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9877 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9878 match(Set dst (URShiftVS dst shift)); 9879 effect(TEMP src); 9880 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9881 ins_encode %{ 9882 int vector_len = 0; 9883 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9884 %} 9885 ins_pipe( pipe_slow ); 9886 %} 9887 9888 instruct vsrl8S(vecX dst, vecS shift) %{ 9889 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9890 match(Set dst (URShiftVS dst shift)); 9891 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9892 ins_encode %{ 9893 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9894 %} 9895 ins_pipe( pipe_slow ); 9896 %} 9897 9898 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9899 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9900 match(Set dst (URShiftVS dst shift)); 9901 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9902 ins_encode %{ 9903 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9904 %} 9905 ins_pipe( pipe_slow ); 9906 %} 9907 9908 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9909 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9910 match(Set dst (URShiftVS src shift)); 9911 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9912 ins_encode %{ 9913 int vector_len = 0; 9914 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9915 %} 9916 ins_pipe( pipe_slow ); 9917 %} 9918 9919 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9920 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9921 match(Set dst (URShiftVS src shift)); 9922 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9923 ins_encode %{ 9924 int vector_len = 0; 9925 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9926 %} 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9931 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9932 match(Set dst (URShiftVS dst shift)); 9933 effect(TEMP src); 9934 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9935 ins_encode %{ 9936 int vector_len = 0; 9937 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9938 %} 9939 ins_pipe( pipe_slow ); 9940 %} 9941 9942 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9943 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9944 match(Set dst (URShiftVS src shift)); 9945 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9946 ins_encode %{ 9947 int vector_len = 0; 9948 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9949 %} 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9954 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9955 match(Set dst (URShiftVS src shift)); 9956 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9957 ins_encode %{ 9958 int vector_len = 0; 9959 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9960 %} 9961 ins_pipe( pipe_slow ); 9962 %} 9963 9964 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9965 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9966 match(Set dst (URShiftVS dst shift)); 9967 effect(TEMP src); 9968 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9969 ins_encode %{ 9970 int vector_len = 0; 9971 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 %} 9975 9976 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9977 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9978 match(Set dst (URShiftVS src shift)); 9979 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9980 ins_encode %{ 9981 int vector_len = 1; 9982 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9988 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9989 match(Set dst (URShiftVS src shift)); 9990 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9991 ins_encode %{ 9992 int vector_len = 1; 9993 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9999 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10000 match(Set dst (URShiftVS dst shift)); 10001 effect(TEMP src); 10002 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10003 ins_encode %{ 10004 int vector_len = 1; 10005 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10006 %} 10007 ins_pipe( pipe_slow ); 10008 %} 10009 10010 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10011 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10012 match(Set dst (URShiftVS src shift)); 10013 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10014 ins_encode %{ 10015 int vector_len = 1; 10016 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10017 %} 10018 ins_pipe( pipe_slow ); 10019 %} 10020 10021 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10022 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10023 match(Set dst (URShiftVS src shift)); 10024 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10025 ins_encode %{ 10026 int vector_len = 1; 10027 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10028 %} 10029 ins_pipe( pipe_slow ); 10030 %} 10031 10032 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10033 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10034 match(Set dst (URShiftVS dst shift)); 10035 effect(TEMP src); 10036 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 10037 ins_encode %{ 10038 int vector_len = 1; 10039 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10040 %} 10041 ins_pipe( pipe_slow ); 10042 %} 10043 10044 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10045 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10046 match(Set dst (URShiftVS src shift)); 10047 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 10048 ins_encode %{ 10049 int vector_len = 2; 10050 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10051 %} 10052 ins_pipe( pipe_slow ); 10053 %} 10054 10055 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10056 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10057 match(Set dst (URShiftVS src shift)); 10058 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 10059 ins_encode %{ 10060 int vector_len = 2; 10061 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10062 %} 10063 ins_pipe( pipe_slow ); 10064 %} 10065 10066 // Integers vector logical right shift 10067 instruct vsrl2I(vecD dst, vecS shift) %{ 10068 predicate(n->as_Vector()->length() == 2); 10069 match(Set dst (URShiftVI dst shift)); 10070 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 10071 ins_encode %{ 10072 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 10073 %} 10074 ins_pipe( pipe_slow ); 10075 %} 10076 10077 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 10078 predicate(n->as_Vector()->length() == 2); 10079 match(Set dst (URShiftVI dst shift)); 10080 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 10081 ins_encode %{ 10082 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 10083 %} 10084 ins_pipe( pipe_slow ); 10085 %} 10086 10087 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 10088 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10089 match(Set dst (URShiftVI src shift)); 10090 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 10091 ins_encode %{ 10092 int vector_len = 0; 10093 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10094 %} 10095 ins_pipe( pipe_slow ); 10096 %} 10097 10098 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10099 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10100 match(Set dst (URShiftVI src shift)); 10101 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 10102 ins_encode %{ 10103 int vector_len = 0; 10104 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10105 %} 10106 ins_pipe( pipe_slow ); 10107 %} 10108 10109 instruct vsrl4I(vecX dst, vecS shift) %{ 10110 predicate(n->as_Vector()->length() == 4); 10111 match(Set dst (URShiftVI dst shift)); 10112 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 10113 ins_encode %{ 10114 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 10115 %} 10116 ins_pipe( pipe_slow ); 10117 %} 10118 10119 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 10120 predicate(n->as_Vector()->length() == 4); 10121 match(Set dst (URShiftVI dst shift)); 10122 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 10123 ins_encode %{ 10124 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 10125 %} 10126 ins_pipe( pipe_slow ); 10127 %} 10128 10129 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 10130 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10131 match(Set dst (URShiftVI src shift)); 10132 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 10133 ins_encode %{ 10134 int vector_len = 0; 10135 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10136 %} 10137 ins_pipe( pipe_slow ); 10138 %} 10139 10140 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10141 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10142 match(Set dst (URShiftVI src shift)); 10143 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 10144 ins_encode %{ 10145 int vector_len = 0; 10146 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10147 %} 10148 ins_pipe( pipe_slow ); 10149 %} 10150 10151 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 10152 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10153 match(Set dst (URShiftVI src shift)); 10154 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 10155 ins_encode %{ 10156 int vector_len = 1; 10157 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10158 %} 10159 ins_pipe( pipe_slow ); 10160 %} 10161 10162 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10163 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10164 match(Set dst (URShiftVI src shift)); 10165 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 10166 ins_encode %{ 10167 int vector_len = 1; 10168 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10174 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10175 match(Set dst (URShiftVI src shift)); 10176 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 10177 ins_encode %{ 10178 int vector_len = 2; 10179 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10180 %} 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10185 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10186 match(Set dst (URShiftVI src shift)); 10187 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 10188 ins_encode %{ 10189 int vector_len = 2; 10190 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10191 %} 10192 ins_pipe( pipe_slow ); 10193 %} 10194 10195 // Longs vector logical right shift 10196 instruct vsrl2L(vecX dst, vecS shift) %{ 10197 predicate(n->as_Vector()->length() == 2); 10198 match(Set dst (URShiftVL dst shift)); 10199 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 10200 ins_encode %{ 10201 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 10202 %} 10203 ins_pipe( pipe_slow ); 10204 %} 10205 10206 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 10207 predicate(n->as_Vector()->length() == 2); 10208 match(Set dst (URShiftVL dst shift)); 10209 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 10210 ins_encode %{ 10211 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 10212 %} 10213 ins_pipe( pipe_slow ); 10214 %} 10215 10216 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 10217 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10218 match(Set dst (URShiftVL src shift)); 10219 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 10220 ins_encode %{ 10221 int vector_len = 0; 10222 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10228 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10229 match(Set dst (URShiftVL src shift)); 10230 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 10231 ins_encode %{ 10232 int vector_len = 0; 10233 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10234 %} 10235 ins_pipe( pipe_slow ); 10236 %} 10237 10238 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 10239 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 10240 match(Set dst (URShiftVL src shift)); 10241 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 10242 ins_encode %{ 10243 int vector_len = 1; 10244 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 10249 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10250 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 10251 match(Set dst (URShiftVL src shift)); 10252 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 10253 ins_encode %{ 10254 int vector_len = 1; 10255 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10256 %} 10257 ins_pipe( pipe_slow ); 10258 %} 10259 10260 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 10261 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10262 match(Set dst (URShiftVL src shift)); 10263 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10264 ins_encode %{ 10265 int vector_len = 2; 10266 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10267 %} 10268 ins_pipe( pipe_slow ); 10269 %} 10270 10271 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10272 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10273 match(Set dst (URShiftVL src shift)); 10274 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 10275 ins_encode %{ 10276 int vector_len = 2; 10277 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10278 %} 10279 ins_pipe( pipe_slow ); 10280 %} 10281 10282 // ------------------- ArithmeticRightShift ----------------------------------- 10283 10284 // Shorts/Chars vector arithmetic right shift 10285 instruct vsra2S(vecS dst, vecS shift) %{ 10286 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 10287 match(Set dst (RShiftVS dst shift)); 10288 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10289 ins_encode %{ 10290 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10291 %} 10292 ins_pipe( pipe_slow ); 10293 %} 10294 10295 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 10296 predicate(n->as_Vector()->length() == 2); 10297 match(Set dst (RShiftVS dst shift)); 10298 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 10299 ins_encode %{ 10300 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10301 %} 10302 ins_pipe( pipe_slow ); 10303 %} 10304 10305 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 10306 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10307 match(Set dst (RShiftVS src shift)); 10308 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10309 ins_encode %{ 10310 int vector_len = 0; 10311 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10312 %} 10313 ins_pipe( pipe_slow ); 10314 %} 10315 10316 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 10317 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10318 match(Set dst (RShiftVS src shift)); 10319 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10320 ins_encode %{ 10321 int vector_len = 0; 10322 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10323 %} 10324 ins_pipe( pipe_slow ); 10325 %} 10326 10327 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 10328 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10329 match(Set dst (RShiftVS dst shift)); 10330 effect(TEMP src); 10331 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10332 ins_encode %{ 10333 int vector_len = 0; 10334 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10335 %} 10336 ins_pipe( pipe_slow ); 10337 %} 10338 10339 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 10340 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 10341 match(Set dst (RShiftVS src shift)); 10342 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10343 ins_encode %{ 10344 int vector_len = 0; 10345 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10346 %} 10347 ins_pipe( pipe_slow ); 10348 %} 10349 10350 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 10351 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 10352 match(Set dst (RShiftVS src shift)); 10353 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10354 ins_encode %{ 10355 int vector_len = 0; 10356 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10357 %} 10358 ins_pipe( pipe_slow ); 10359 %} 10360 10361 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 10362 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 10363 match(Set dst (RShiftVS dst shift)); 10364 effect(TEMP src); 10365 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 10366 ins_encode %{ 10367 int vector_len = 0; 10368 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10369 %} 10370 ins_pipe( pipe_slow ); 10371 %} 10372 10373 instruct vsra4S(vecD dst, vecS shift) %{ 10374 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10375 match(Set dst (RShiftVS dst shift)); 10376 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10377 ins_encode %{ 10378 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10379 %} 10380 ins_pipe( pipe_slow ); 10381 %} 10382 10383 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 10384 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 10385 match(Set dst (RShiftVS dst shift)); 10386 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 10387 ins_encode %{ 10388 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10389 %} 10390 ins_pipe( pipe_slow ); 10391 %} 10392 10393 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 10394 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10395 match(Set dst (RShiftVS src shift)); 10396 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10397 ins_encode %{ 10398 int vector_len = 0; 10399 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10400 %} 10401 ins_pipe( pipe_slow ); 10402 %} 10403 10404 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 10405 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10406 match(Set dst (RShiftVS src shift)); 10407 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10408 ins_encode %{ 10409 int vector_len = 0; 10410 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10411 %} 10412 ins_pipe( pipe_slow ); 10413 %} 10414 10415 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 10416 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10417 match(Set dst (RShiftVS dst shift)); 10418 effect(TEMP src); 10419 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10420 ins_encode %{ 10421 int vector_len = 0; 10422 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10423 %} 10424 ins_pipe( pipe_slow ); 10425 %} 10426 10427 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10428 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10429 match(Set dst (RShiftVS src shift)); 10430 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10431 ins_encode %{ 10432 int vector_len = 0; 10433 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10434 %} 10435 ins_pipe( pipe_slow ); 10436 %} 10437 10438 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10439 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10440 match(Set dst (RShiftVS src shift)); 10441 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10442 ins_encode %{ 10443 int vector_len = 0; 10444 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10445 %} 10446 ins_pipe( pipe_slow ); 10447 %} 10448 10449 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10450 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10451 match(Set dst (RShiftVS dst shift)); 10452 effect(TEMP src); 10453 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10454 ins_encode %{ 10455 int vector_len = 0; 10456 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10457 %} 10458 ins_pipe( pipe_slow ); 10459 %} 10460 10461 instruct vsra8S(vecX dst, vecS shift) %{ 10462 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10463 match(Set dst (RShiftVS dst shift)); 10464 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10465 ins_encode %{ 10466 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10467 %} 10468 ins_pipe( pipe_slow ); 10469 %} 10470 10471 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10472 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10473 match(Set dst (RShiftVS dst shift)); 10474 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10475 ins_encode %{ 10476 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10477 %} 10478 ins_pipe( pipe_slow ); 10479 %} 10480 10481 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10482 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10483 match(Set dst (RShiftVS src shift)); 10484 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10485 ins_encode %{ 10486 int vector_len = 0; 10487 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10488 %} 10489 ins_pipe( pipe_slow ); 10490 %} 10491 10492 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10493 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10494 match(Set dst (RShiftVS src shift)); 10495 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10496 ins_encode %{ 10497 int vector_len = 0; 10498 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10499 %} 10500 ins_pipe( pipe_slow ); 10501 %} 10502 10503 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10504 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10505 match(Set dst (RShiftVS dst shift)); 10506 effect(TEMP src); 10507 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10508 ins_encode %{ 10509 int vector_len = 0; 10510 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10511 %} 10512 ins_pipe( pipe_slow ); 10513 %} 10514 10515 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10516 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10517 match(Set dst (RShiftVS src shift)); 10518 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10519 ins_encode %{ 10520 int vector_len = 0; 10521 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10522 %} 10523 ins_pipe( pipe_slow ); 10524 %} 10525 10526 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10527 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10528 match(Set dst (RShiftVS src shift)); 10529 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10530 ins_encode %{ 10531 int vector_len = 0; 10532 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10533 %} 10534 ins_pipe( pipe_slow ); 10535 %} 10536 10537 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10538 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10539 match(Set dst (RShiftVS dst shift)); 10540 effect(TEMP src); 10541 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10542 ins_encode %{ 10543 int vector_len = 0; 10544 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10545 %} 10546 ins_pipe( pipe_slow ); 10547 %} 10548 10549 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10550 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10551 match(Set dst (RShiftVS src shift)); 10552 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10553 ins_encode %{ 10554 int vector_len = 1; 10555 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10556 %} 10557 ins_pipe( pipe_slow ); 10558 %} 10559 10560 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10561 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10562 match(Set dst (RShiftVS src shift)); 10563 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10564 ins_encode %{ 10565 int vector_len = 1; 10566 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10567 %} 10568 ins_pipe( pipe_slow ); 10569 %} 10570 10571 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10572 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10573 match(Set dst (RShiftVS dst shift)); 10574 effect(TEMP src); 10575 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10576 ins_encode %{ 10577 int vector_len = 1; 10578 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10579 %} 10580 ins_pipe( pipe_slow ); 10581 %} 10582 10583 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10584 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10585 match(Set dst (RShiftVS src shift)); 10586 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10587 ins_encode %{ 10588 int vector_len = 1; 10589 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10590 %} 10591 ins_pipe( pipe_slow ); 10592 %} 10593 10594 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10595 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10596 match(Set dst (RShiftVS src shift)); 10597 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10598 ins_encode %{ 10599 int vector_len = 1; 10600 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10601 %} 10602 ins_pipe( pipe_slow ); 10603 %} 10604 10605 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10606 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10607 match(Set dst (RShiftVS dst shift)); 10608 effect(TEMP src); 10609 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10610 ins_encode %{ 10611 int vector_len = 1; 10612 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10613 %} 10614 ins_pipe( pipe_slow ); 10615 %} 10616 10617 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10618 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10619 match(Set dst (RShiftVS src shift)); 10620 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10621 ins_encode %{ 10622 int vector_len = 2; 10623 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10624 %} 10625 ins_pipe( pipe_slow ); 10626 %} 10627 10628 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10629 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10630 match(Set dst (RShiftVS src shift)); 10631 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10632 ins_encode %{ 10633 int vector_len = 2; 10634 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10635 %} 10636 ins_pipe( pipe_slow ); 10637 %} 10638 10639 // Integers vector arithmetic right shift 10640 instruct vsra2I(vecD dst, vecS shift) %{ 10641 predicate(n->as_Vector()->length() == 2); 10642 match(Set dst (RShiftVI dst shift)); 10643 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10644 ins_encode %{ 10645 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10646 %} 10647 ins_pipe( pipe_slow ); 10648 %} 10649 10650 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10651 predicate(n->as_Vector()->length() == 2); 10652 match(Set dst (RShiftVI dst shift)); 10653 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10654 ins_encode %{ 10655 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10656 %} 10657 ins_pipe( pipe_slow ); 10658 %} 10659 10660 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10661 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10662 match(Set dst (RShiftVI src shift)); 10663 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10664 ins_encode %{ 10665 int vector_len = 0; 10666 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10667 %} 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10672 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10673 match(Set dst (RShiftVI src shift)); 10674 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10675 ins_encode %{ 10676 int vector_len = 0; 10677 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10678 %} 10679 ins_pipe( pipe_slow ); 10680 %} 10681 10682 instruct vsra4I(vecX dst, vecS shift) %{ 10683 predicate(n->as_Vector()->length() == 4); 10684 match(Set dst (RShiftVI dst shift)); 10685 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10686 ins_encode %{ 10687 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10688 %} 10689 ins_pipe( pipe_slow ); 10690 %} 10691 10692 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10693 predicate(n->as_Vector()->length() == 4); 10694 match(Set dst (RShiftVI dst shift)); 10695 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10696 ins_encode %{ 10697 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10698 %} 10699 ins_pipe( pipe_slow ); 10700 %} 10701 10702 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10703 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10704 match(Set dst (RShiftVI src shift)); 10705 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10706 ins_encode %{ 10707 int vector_len = 0; 10708 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10709 %} 10710 ins_pipe( pipe_slow ); 10711 %} 10712 10713 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10714 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10715 match(Set dst (RShiftVI src shift)); 10716 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10717 ins_encode %{ 10718 int vector_len = 0; 10719 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10720 %} 10721 ins_pipe( pipe_slow ); 10722 %} 10723 10724 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10725 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10726 match(Set dst (RShiftVI src shift)); 10727 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10728 ins_encode %{ 10729 int vector_len = 1; 10730 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10731 %} 10732 ins_pipe( pipe_slow ); 10733 %} 10734 10735 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10736 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10737 match(Set dst (RShiftVI src shift)); 10738 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10739 ins_encode %{ 10740 int vector_len = 1; 10741 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10742 %} 10743 ins_pipe( pipe_slow ); 10744 %} 10745 10746 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10747 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10748 match(Set dst (RShiftVI src shift)); 10749 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10750 ins_encode %{ 10751 int vector_len = 2; 10752 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10753 %} 10754 ins_pipe( pipe_slow ); 10755 %} 10756 10757 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10758 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10759 match(Set dst (RShiftVI src shift)); 10760 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10761 ins_encode %{ 10762 int vector_len = 2; 10763 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10764 %} 10765 ins_pipe( pipe_slow ); 10766 %} 10767 10768 // There are no longs vector arithmetic right shift instructions. 10769 10770 10771 // --------------------------------- AND -------------------------------------- 10772 10773 instruct vand4B(vecS dst, vecS src) %{ 10774 predicate(n->as_Vector()->length_in_bytes() == 4); 10775 match(Set dst (AndV dst src)); 10776 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10777 ins_encode %{ 10778 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10779 %} 10780 ins_pipe( pipe_slow ); 10781 %} 10782 10783 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10784 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10785 match(Set dst (AndV src1 src2)); 10786 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10787 ins_encode %{ 10788 int vector_len = 0; 10789 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10790 %} 10791 ins_pipe( pipe_slow ); 10792 %} 10793 10794 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10795 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10796 match(Set dst (AndV src (LoadVector mem))); 10797 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10798 ins_encode %{ 10799 int vector_len = 0; 10800 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10801 %} 10802 ins_pipe( pipe_slow ); 10803 %} 10804 10805 instruct vand8B(vecD dst, vecD src) %{ 10806 predicate(n->as_Vector()->length_in_bytes() == 8); 10807 match(Set dst (AndV dst src)); 10808 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10809 ins_encode %{ 10810 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10811 %} 10812 ins_pipe( pipe_slow ); 10813 %} 10814 10815 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10816 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10817 match(Set dst (AndV src1 src2)); 10818 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10819 ins_encode %{ 10820 int vector_len = 0; 10821 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10822 %} 10823 ins_pipe( pipe_slow ); 10824 %} 10825 10826 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10827 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10828 match(Set dst (AndV src (LoadVector mem))); 10829 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10830 ins_encode %{ 10831 int vector_len = 0; 10832 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10833 %} 10834 ins_pipe( pipe_slow ); 10835 %} 10836 10837 instruct vand16B(vecX dst, vecX src) %{ 10838 predicate(n->as_Vector()->length_in_bytes() == 16); 10839 match(Set dst (AndV dst src)); 10840 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10841 ins_encode %{ 10842 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10843 %} 10844 ins_pipe( pipe_slow ); 10845 %} 10846 10847 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10848 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10849 match(Set dst (AndV src1 src2)); 10850 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10851 ins_encode %{ 10852 int vector_len = 0; 10853 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10854 %} 10855 ins_pipe( pipe_slow ); 10856 %} 10857 10858 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10859 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10860 match(Set dst (AndV src (LoadVector mem))); 10861 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10862 ins_encode %{ 10863 int vector_len = 0; 10864 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10865 %} 10866 ins_pipe( pipe_slow ); 10867 %} 10868 10869 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10870 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10871 match(Set dst (AndV src1 src2)); 10872 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10873 ins_encode %{ 10874 int vector_len = 1; 10875 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10876 %} 10877 ins_pipe( pipe_slow ); 10878 %} 10879 10880 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10881 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10882 match(Set dst (AndV src (LoadVector mem))); 10883 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10884 ins_encode %{ 10885 int vector_len = 1; 10886 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10887 %} 10888 ins_pipe( pipe_slow ); 10889 %} 10890 10891 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10892 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10893 match(Set dst (AndV src1 src2)); 10894 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10895 ins_encode %{ 10896 int vector_len = 2; 10897 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10898 %} 10899 ins_pipe( pipe_slow ); 10900 %} 10901 10902 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10903 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10904 match(Set dst (AndV src (LoadVector mem))); 10905 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10906 ins_encode %{ 10907 int vector_len = 2; 10908 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10909 %} 10910 ins_pipe( pipe_slow ); 10911 %} 10912 10913 // --------------------------------- OR --------------------------------------- 10914 10915 instruct vor4B(vecS dst, vecS src) %{ 10916 predicate(n->as_Vector()->length_in_bytes() == 4); 10917 match(Set dst (OrV dst src)); 10918 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10919 ins_encode %{ 10920 __ por($dst$$XMMRegister, $src$$XMMRegister); 10921 %} 10922 ins_pipe( pipe_slow ); 10923 %} 10924 10925 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10926 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10927 match(Set dst (OrV src1 src2)); 10928 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10929 ins_encode %{ 10930 int vector_len = 0; 10931 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10932 %} 10933 ins_pipe( pipe_slow ); 10934 %} 10935 10936 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10937 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10938 match(Set dst (OrV src (LoadVector mem))); 10939 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10940 ins_encode %{ 10941 int vector_len = 0; 10942 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10943 %} 10944 ins_pipe( pipe_slow ); 10945 %} 10946 10947 instruct vor8B(vecD dst, vecD src) %{ 10948 predicate(n->as_Vector()->length_in_bytes() == 8); 10949 match(Set dst (OrV dst src)); 10950 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10951 ins_encode %{ 10952 __ por($dst$$XMMRegister, $src$$XMMRegister); 10953 %} 10954 ins_pipe( pipe_slow ); 10955 %} 10956 10957 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10958 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10959 match(Set dst (OrV src1 src2)); 10960 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10961 ins_encode %{ 10962 int vector_len = 0; 10963 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10964 %} 10965 ins_pipe( pipe_slow ); 10966 %} 10967 10968 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10969 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10970 match(Set dst (OrV src (LoadVector mem))); 10971 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10972 ins_encode %{ 10973 int vector_len = 0; 10974 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10975 %} 10976 ins_pipe( pipe_slow ); 10977 %} 10978 10979 instruct vor16B(vecX dst, vecX src) %{ 10980 predicate(n->as_Vector()->length_in_bytes() == 16); 10981 match(Set dst (OrV dst src)); 10982 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10983 ins_encode %{ 10984 __ por($dst$$XMMRegister, $src$$XMMRegister); 10985 %} 10986 ins_pipe( pipe_slow ); 10987 %} 10988 10989 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10990 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10991 match(Set dst (OrV src1 src2)); 10992 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10993 ins_encode %{ 10994 int vector_len = 0; 10995 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10996 %} 10997 ins_pipe( pipe_slow ); 10998 %} 10999 11000 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 11001 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11002 match(Set dst (OrV src (LoadVector mem))); 11003 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 11004 ins_encode %{ 11005 int vector_len = 0; 11006 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11007 %} 11008 ins_pipe( pipe_slow ); 11009 %} 11010 11011 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 11012 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11013 match(Set dst (OrV src1 src2)); 11014 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 11015 ins_encode %{ 11016 int vector_len = 1; 11017 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11018 %} 11019 ins_pipe( pipe_slow ); 11020 %} 11021 11022 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 11023 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11024 match(Set dst (OrV src (LoadVector mem))); 11025 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 11026 ins_encode %{ 11027 int vector_len = 1; 11028 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11029 %} 11030 ins_pipe( pipe_slow ); 11031 %} 11032 11033 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11034 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11035 match(Set dst (OrV src1 src2)); 11036 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 11037 ins_encode %{ 11038 int vector_len = 2; 11039 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11040 %} 11041 ins_pipe( pipe_slow ); 11042 %} 11043 11044 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 11045 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11046 match(Set dst (OrV src (LoadVector mem))); 11047 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 11048 ins_encode %{ 11049 int vector_len = 2; 11050 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11051 %} 11052 ins_pipe( pipe_slow ); 11053 %} 11054 11055 // --------------------------------- XOR -------------------------------------- 11056 11057 instruct vxor4B(vecS dst, vecS src) %{ 11058 predicate(n->as_Vector()->length_in_bytes() == 4); 11059 match(Set dst (XorV dst src)); 11060 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 11061 ins_encode %{ 11062 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 11063 %} 11064 ins_pipe( pipe_slow ); 11065 %} 11066 11067 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 11068 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11069 match(Set dst (XorV src1 src2)); 11070 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 11071 ins_encode %{ 11072 int vector_len = 0; 11073 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11074 %} 11075 ins_pipe( pipe_slow ); 11076 %} 11077 11078 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 11079 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 11080 match(Set dst (XorV src (LoadVector mem))); 11081 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 11082 ins_encode %{ 11083 int vector_len = 0; 11084 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11085 %} 11086 ins_pipe( pipe_slow ); 11087 %} 11088 11089 instruct vxor8B(vecD dst, vecD src) %{ 11090 predicate(n->as_Vector()->length_in_bytes() == 8); 11091 match(Set dst (XorV dst src)); 11092 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 11093 ins_encode %{ 11094 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 11095 %} 11096 ins_pipe( pipe_slow ); 11097 %} 11098 11099 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 11100 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11101 match(Set dst (XorV src1 src2)); 11102 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 11103 ins_encode %{ 11104 int vector_len = 0; 11105 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11106 %} 11107 ins_pipe( pipe_slow ); 11108 %} 11109 11110 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 11111 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 11112 match(Set dst (XorV src (LoadVector mem))); 11113 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 11114 ins_encode %{ 11115 int vector_len = 0; 11116 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11117 %} 11118 ins_pipe( pipe_slow ); 11119 %} 11120 11121 instruct vxor16B(vecX dst, vecX src) %{ 11122 predicate(n->as_Vector()->length_in_bytes() == 16); 11123 match(Set dst (XorV dst src)); 11124 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 11125 ins_encode %{ 11126 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 11127 %} 11128 ins_pipe( pipe_slow ); 11129 %} 11130 11131 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 11132 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11133 match(Set dst (XorV src1 src2)); 11134 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 11135 ins_encode %{ 11136 int vector_len = 0; 11137 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11138 %} 11139 ins_pipe( pipe_slow ); 11140 %} 11141 11142 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 11143 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11144 match(Set dst (XorV src (LoadVector mem))); 11145 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 11146 ins_encode %{ 11147 int vector_len = 0; 11148 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11149 %} 11150 ins_pipe( pipe_slow ); 11151 %} 11152 11153 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 11154 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11155 match(Set dst (XorV src1 src2)); 11156 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 11157 ins_encode %{ 11158 int vector_len = 1; 11159 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11160 %} 11161 ins_pipe( pipe_slow ); 11162 %} 11163 11164 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 11165 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 11166 match(Set dst (XorV src (LoadVector mem))); 11167 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 11168 ins_encode %{ 11169 int vector_len = 1; 11170 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11171 %} 11172 ins_pipe( pipe_slow ); 11173 %} 11174 11175 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11176 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11177 match(Set dst (XorV src1 src2)); 11178 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 11179 ins_encode %{ 11180 int vector_len = 2; 11181 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11182 %} 11183 ins_pipe( pipe_slow ); 11184 %} 11185 11186 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 11187 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11188 match(Set dst (XorV src (LoadVector mem))); 11189 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 11190 ins_encode %{ 11191 int vector_len = 2; 11192 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11193 %} 11194 ins_pipe( pipe_slow ); 11195 %} 11196 11197 instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ 11198 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 11199 match(Set dst (ConvertVF2VD src)); 11200 format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} 11201 ins_encode %{ 11202 int vector_len = 0; 11203 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11204 %} 11205 ins_pipe( pipe_slow ); 11206 %} 11207 11208 instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ 11209 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 11210 match(Set dst (ConvertVF2VD src)); 11211 format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} 11212 ins_encode %{ 11213 int vector_len = 1; 11214 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11215 %} 11216 ins_pipe( pipe_slow ); 11217 %} 11218 11219 instruct vcvt8Fto4D_reg(vecY dst, vecY src) %{ 11220 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 11221 match(Set dst (ConvertVF2VD src)); 11222 format %{ "vcvtps2pd $dst,$src\t! convert 8F to 4D vector" %} 11223 ins_encode %{ 11224 int vector_len = 1; 11225 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11226 %} 11227 ins_pipe( pipe_slow ); 11228 %} 11229 11230 instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ 11231 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 11232 match(Set dst (ConvertVF2VD src)); 11233 format %{ "evcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} 11234 ins_encode %{ 11235 int vector_len = 2; 11236 __ evcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11237 %} 11238 ins_pipe( pipe_slow ); 11239 %} 11240 11241 instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ 11242 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11243 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11244 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11245 match(Set dst (VectorMaskCmp src1 src2)); 11246 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} 11247 ins_encode %{ 11248 int vector_len = 0; 11249 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11250 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11251 %} 11252 ins_pipe( pipe_slow ); 11253 %} 11254 11255 instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ 11256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11257 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11258 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11259 match(Set dst (VectorMaskCmp src1 src2)); 11260 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} 11261 ins_encode %{ 11262 int vector_len = 0; 11263 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11264 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11265 %} 11266 ins_pipe( pipe_slow ); 11267 %} 11268 11269 instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ 11270 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11271 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11272 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11273 match(Set dst (VectorMaskCmp src1 src2)); 11274 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} 11275 ins_encode %{ 11276 int vector_len = 1; 11277 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11278 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11279 %} 11280 ins_pipe( pipe_slow ); 11281 %} 11282 11283 instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11284 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11285 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11286 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11287 match(Set dst (VectorMaskCmp src1 src2)); 11288 effect(TEMP dst, TEMP scratch); 11289 format %{ "vcmpeqps k2,$src1,$src2\n\t" 11290 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} 11291 ins_encode %{ 11292 int vector_len = 2; 11293 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11294 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11295 KRegister mask = k0; // The comparison itself is not being masked. 11296 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11297 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11298 %} 11299 ins_pipe( pipe_slow ); 11300 %} 11301 11302 instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ 11303 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11304 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11305 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11306 match(Set dst (VectorMaskCmp src1 src2)); 11307 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} 11308 ins_encode %{ 11309 int vector_len = 0; 11310 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11311 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11312 %} 11313 ins_pipe( pipe_slow ); 11314 %} 11315 11316 instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ 11317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11318 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11319 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11320 match(Set dst (VectorMaskCmp src1 src2)); 11321 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} 11322 ins_encode %{ 11323 int vector_len = 0; 11324 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11325 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11326 %} 11327 ins_pipe( pipe_slow ); 11328 %} 11329 11330 instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ 11331 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11332 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11333 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11334 match(Set dst (VectorMaskCmp src1 src2)); 11335 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} 11336 ins_encode %{ 11337 int vector_len = 1; 11338 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11339 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11340 %} 11341 ins_pipe( pipe_slow ); 11342 %} 11343 11344 instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11345 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11346 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11347 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11348 match(Set dst (VectorMaskCmp src1 src2)); 11349 effect(TEMP dst, TEMP scratch); 11350 format %{ "vcmpltps k2,$src1,$src2\n\t" 11351 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} 11352 ins_encode %{ 11353 int vector_len = 2; 11354 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11355 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11356 KRegister mask = k0; // The comparison itself is not being masked. 11357 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11358 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11359 %} 11360 ins_pipe( pipe_slow ); 11361 %} 11362 11363 instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ 11364 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11365 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11366 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11367 match(Set dst (VectorMaskCmp src1 src2)); 11368 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} 11369 ins_encode %{ 11370 int vector_len = 0; 11371 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11372 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11373 %} 11374 ins_pipe( pipe_slow ); 11375 %} 11376 11377 instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ 11378 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11379 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11380 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11381 match(Set dst (VectorMaskCmp src1 src2)); 11382 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} 11383 ins_encode %{ 11384 int vector_len = 0; 11385 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11386 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11387 %} 11388 ins_pipe( pipe_slow ); 11389 %} 11390 11391 instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ 11392 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11393 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11394 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11395 match(Set dst (VectorMaskCmp src1 src2)); 11396 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} 11397 ins_encode %{ 11398 int vector_len = 1; 11399 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11400 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11401 %} 11402 ins_pipe( pipe_slow ); 11403 %} 11404 11405 instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11406 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11407 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11408 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11409 match(Set dst (VectorMaskCmp src1 src2)); 11410 effect(TEMP dst, TEMP scratch); 11411 format %{ "vcmpgtps k2,$src1,$src2\n\t" 11412 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} 11413 ins_encode %{ 11414 int vector_len = 2; 11415 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11416 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11417 KRegister mask = k0; // The comparison itself is not being masked. 11418 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11419 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11420 %} 11421 ins_pipe( pipe_slow ); 11422 %} 11423 11424 instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ 11425 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11426 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11427 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11428 match(Set dst (VectorMaskCmp src1 src2)); 11429 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} 11430 ins_encode %{ 11431 int vector_len = 0; 11432 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11433 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11434 %} 11435 ins_pipe( pipe_slow ); 11436 %} 11437 11438 instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ 11439 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11440 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11441 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11442 match(Set dst (VectorMaskCmp src1 src2)); 11443 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} 11444 ins_encode %{ 11445 int vector_len = 0; 11446 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11447 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11448 %} 11449 ins_pipe( pipe_slow ); 11450 %} 11451 11452 instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ 11453 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11454 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11455 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11456 match(Set dst (VectorMaskCmp src1 src2)); 11457 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} 11458 ins_encode %{ 11459 int vector_len = 1; 11460 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11461 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11462 %} 11463 ins_pipe( pipe_slow ); 11464 %} 11465 11466 instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11467 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11468 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11469 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11470 match(Set dst (VectorMaskCmp src1 src2)); 11471 effect(TEMP dst, TEMP scratch); 11472 format %{ "vcmpgeps k2,$src1,$src2\n\t" 11473 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} 11474 ins_encode %{ 11475 int vector_len = 2; 11476 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11477 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11478 KRegister mask = k0; // The comparison itself is not being masked. 11479 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11480 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11481 %} 11482 ins_pipe( pipe_slow ); 11483 %} 11484 11485 instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ 11486 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11487 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11488 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11489 match(Set dst (VectorMaskCmp src1 src2)); 11490 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} 11491 ins_encode %{ 11492 int vector_len = 0; 11493 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11494 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11495 %} 11496 ins_pipe( pipe_slow ); 11497 %} 11498 11499 instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ 11500 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11501 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11502 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11503 match(Set dst (VectorMaskCmp src1 src2)); 11504 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} 11505 ins_encode %{ 11506 int vector_len = 0; 11507 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11508 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11509 %} 11510 ins_pipe( pipe_slow ); 11511 %} 11512 11513 instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ 11514 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11515 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11516 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11517 match(Set dst (VectorMaskCmp src1 src2)); 11518 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} 11519 ins_encode %{ 11520 int vector_len = 1; 11521 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11522 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11523 %} 11524 ins_pipe( pipe_slow ); 11525 %} 11526 11527 instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11528 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11529 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11530 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11531 match(Set dst (VectorMaskCmp src1 src2)); 11532 effect(TEMP dst, TEMP scratch); 11533 format %{ "vcmpleps k2,$src1,$src2\n\t" 11534 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} 11535 ins_encode %{ 11536 int vector_len = 2; 11537 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11538 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11539 KRegister mask = k0; // The comparison itself is not being masked. 11540 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11541 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11542 %} 11543 ins_pipe( pipe_slow ); 11544 %} 11545 11546 instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ 11547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11548 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11549 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11550 match(Set dst (VectorMaskCmp src1 src2)); 11551 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} 11552 ins_encode %{ 11553 int vector_len = 0; 11554 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11555 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11556 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11557 %} 11558 ins_pipe( pipe_slow ); 11559 %} 11560 11561 instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ 11562 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11563 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11564 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11565 match(Set dst (VectorMaskCmp src1 src2)); 11566 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} 11567 ins_encode %{ 11568 int vector_len = 0; 11569 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11570 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11571 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11572 %} 11573 ins_pipe( pipe_slow ); 11574 %} 11575 11576 instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ 11577 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 11578 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11579 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11580 match(Set dst (VectorMaskCmp src1 src2)); 11581 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} 11582 ins_encode %{ 11583 int vector_len = 1; 11584 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11585 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11586 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11587 %} 11588 ins_pipe( pipe_slow ); 11589 %} 11590 11591 instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11592 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 11593 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11594 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11595 match(Set dst (VectorMaskCmp src1 src2)); 11596 effect(TEMP dst, TEMP scratch); 11597 format %{ "vcmpneps k2,$src1,$src2\n\t" 11598 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} 11599 ins_encode %{ 11600 int vector_len = 2; 11601 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11602 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11603 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11604 KRegister mask = k0; // The comparison itself is not being masked. 11605 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11606 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11607 %} 11608 ins_pipe( pipe_slow ); 11609 %} 11610 11611 instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ 11612 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11613 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11614 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11615 match(Set dst (VectorMaskCmp src1 src2)); 11616 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} 11617 ins_encode %{ 11618 int vector_len = 0; 11619 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11620 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11621 %} 11622 ins_pipe( pipe_slow ); 11623 %} 11624 11625 instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ 11626 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11627 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11628 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11629 match(Set dst (VectorMaskCmp src1 src2)); 11630 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} 11631 ins_encode %{ 11632 int vector_len = 0; 11633 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11634 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11635 %} 11636 ins_pipe( pipe_slow ); 11637 %} 11638 11639 instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ 11640 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11641 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11642 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11643 match(Set dst (VectorMaskCmp src1 src2)); 11644 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} 11645 ins_encode %{ 11646 int vector_len = 1; 11647 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11648 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11649 %} 11650 ins_pipe( pipe_slow ); 11651 %} 11652 11653 instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11654 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11655 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11656 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11657 match(Set dst (VectorMaskCmp src1 src2)); 11658 effect(TEMP dst, TEMP scratch); 11659 format %{ "vcmpeqpd k2,$src1,$src2\n\t" 11660 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} 11661 ins_encode %{ 11662 int vector_len = 2; 11663 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 11664 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11665 KRegister mask = k0; // The comparison itself is not being masked. 11666 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11667 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11668 %} 11669 ins_pipe( pipe_slow ); 11670 %} 11671 11672 instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ 11673 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11674 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11675 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11676 match(Set dst (VectorMaskCmp src1 src2)); 11677 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} 11678 ins_encode %{ 11679 int vector_len = 0; 11680 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11681 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11682 %} 11683 ins_pipe( pipe_slow ); 11684 %} 11685 11686 instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ 11687 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11688 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11689 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11690 match(Set dst (VectorMaskCmp src1 src2)); 11691 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} 11692 ins_encode %{ 11693 int vector_len = 0; 11694 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11695 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11696 %} 11697 ins_pipe( pipe_slow ); 11698 %} 11699 11700 instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ 11701 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11702 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11703 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11704 match(Set dst (VectorMaskCmp src1 src2)); 11705 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} 11706 ins_encode %{ 11707 int vector_len = 1; 11708 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11709 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11710 %} 11711 ins_pipe( pipe_slow ); 11712 %} 11713 11714 instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11715 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11716 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 11717 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11718 match(Set dst (VectorMaskCmp src1 src2)); 11719 effect(TEMP dst, TEMP scratch); 11720 format %{ "vcmpltpd k2,$src1,$src2\n\t" 11721 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} 11722 ins_encode %{ 11723 int vector_len = 2; 11724 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 11725 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11726 KRegister mask = k0; // The comparison itself is not being masked. 11727 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11728 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11729 %} 11730 ins_pipe( pipe_slow ); 11731 %} 11732 11733 instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ 11734 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11735 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11736 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11737 match(Set dst (VectorMaskCmp src1 src2)); 11738 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} 11739 ins_encode %{ 11740 int vector_len = 0; 11741 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11742 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11743 %} 11744 ins_pipe( pipe_slow ); 11745 %} 11746 11747 instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ 11748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11749 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11750 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11751 match(Set dst (VectorMaskCmp src1 src2)); 11752 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} 11753 ins_encode %{ 11754 int vector_len = 0; 11755 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11756 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11757 %} 11758 ins_pipe( pipe_slow ); 11759 %} 11760 11761 instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ 11762 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11763 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11764 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11765 match(Set dst (VectorMaskCmp src1 src2)); 11766 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} 11767 ins_encode %{ 11768 int vector_len = 1; 11769 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11770 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11771 %} 11772 ins_pipe( pipe_slow ); 11773 %} 11774 11775 instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11776 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11777 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 11778 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11779 match(Set dst (VectorMaskCmp src1 src2)); 11780 effect(TEMP dst, TEMP scratch); 11781 format %{ "vcmpgtpd k2,$src1,$src2\n\t" 11782 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} 11783 ins_encode %{ 11784 int vector_len = 2; 11785 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 11786 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11787 KRegister mask = k0; // The comparison itself is not being masked. 11788 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11789 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11790 %} 11791 ins_pipe( pipe_slow ); 11792 %} 11793 11794 instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ 11795 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11796 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11797 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11798 match(Set dst (VectorMaskCmp src1 src2)); 11799 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} 11800 ins_encode %{ 11801 int vector_len = 0; 11802 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11803 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11804 %} 11805 ins_pipe( pipe_slow ); 11806 %} 11807 11808 instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ 11809 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11810 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11811 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11812 match(Set dst (VectorMaskCmp src1 src2)); 11813 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} 11814 ins_encode %{ 11815 int vector_len = 0; 11816 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11817 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11818 %} 11819 ins_pipe( pipe_slow ); 11820 %} 11821 11822 instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ 11823 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11824 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11825 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11826 match(Set dst (VectorMaskCmp src1 src2)); 11827 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} 11828 ins_encode %{ 11829 int vector_len = 1; 11830 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11831 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11832 %} 11833 ins_pipe( pipe_slow ); 11834 %} 11835 11836 instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11837 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11838 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 11839 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11840 match(Set dst (VectorMaskCmp src1 src2)); 11841 effect(TEMP dst, TEMP scratch); 11842 format %{ "vcmpgepd k2,$src1,$src2\n\t" 11843 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} 11844 ins_encode %{ 11845 int vector_len = 2; 11846 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 11847 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11848 KRegister mask = k0; // The comparison itself is not being masked. 11849 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11850 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11851 %} 11852 ins_pipe( pipe_slow ); 11853 %} 11854 11855 instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ 11856 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11857 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11858 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11859 match(Set dst (VectorMaskCmp src1 src2)); 11860 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} 11861 ins_encode %{ 11862 int vector_len = 0; 11863 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11864 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11865 %} 11866 ins_pipe( pipe_slow ); 11867 %} 11868 11869 instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ 11870 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11871 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11872 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11873 match(Set dst (VectorMaskCmp src1 src2)); 11874 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} 11875 ins_encode %{ 11876 int vector_len = 0; 11877 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11878 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11879 %} 11880 ins_pipe( pipe_slow ); 11881 %} 11882 11883 instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ 11884 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11885 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11886 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11887 match(Set dst (VectorMaskCmp src1 src2)); 11888 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} 11889 ins_encode %{ 11890 int vector_len = 1; 11891 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11892 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11893 %} 11894 ins_pipe( pipe_slow ); 11895 %} 11896 11897 instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11898 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11899 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 11900 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11901 match(Set dst (VectorMaskCmp src1 src2)); 11902 effect(TEMP dst, TEMP scratch); 11903 format %{ "vcmplepd k2,$src1,$src2\n\t" 11904 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} 11905 ins_encode %{ 11906 int vector_len = 2; 11907 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 11908 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11909 KRegister mask = k0; // The comparison itself is not being masked. 11910 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11911 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11912 %} 11913 ins_pipe( pipe_slow ); 11914 %} 11915 11916 instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ 11917 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 11918 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11919 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11920 match(Set dst (VectorMaskCmp src1 src2)); 11921 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} 11922 ins_encode %{ 11923 int vector_len = 0; 11924 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11925 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11926 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11927 %} 11928 ins_pipe( pipe_slow ); 11929 %} 11930 11931 instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ 11932 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11933 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11934 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11935 match(Set dst (VectorMaskCmp src1 src2)); 11936 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} 11937 ins_encode %{ 11938 int vector_len = 0; 11939 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11940 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11941 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11942 %} 11943 ins_pipe( pipe_slow ); 11944 %} 11945 11946 instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ 11947 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11948 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11949 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11950 match(Set dst (VectorMaskCmp src1 src2)); 11951 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} 11952 ins_encode %{ 11953 int vector_len = 1; 11954 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11955 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11956 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11957 %} 11958 ins_pipe( pipe_slow ); 11959 %} 11960 11961 instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 11962 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 11963 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 11964 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11965 match(Set dst (VectorMaskCmp src1 src2)); 11966 effect(TEMP dst, TEMP scratch); 11967 format %{ "vcmpnepd k2,$src1,$src2\n\t" 11968 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} 11969 ins_encode %{ 11970 int vector_len = 2; 11971 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 11972 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 11973 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 11974 KRegister mask = k0; // The comparison itself is not being masked. 11975 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 11976 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 11977 %} 11978 ins_pipe( pipe_slow ); 11979 %} 11980 11981 instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ 11982 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 11983 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11984 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11985 match(Set dst (VectorMaskCmp src1 src2)); 11986 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} 11987 ins_encode %{ 11988 int vector_len = 0; 11989 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11990 %} 11991 ins_pipe( pipe_slow ); 11992 %} 11993 11994 instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ 11995 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 11996 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 11997 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11998 match(Set dst (VectorMaskCmp src1 src2)); 11999 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} 12000 ins_encode %{ 12001 int vector_len = 0; 12002 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12003 %} 12004 ins_pipe( pipe_slow ); 12005 %} 12006 12007 instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ 12008 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12009 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12010 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12011 match(Set dst (VectorMaskCmp src1 src2)); 12012 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} 12013 ins_encode %{ 12014 int vector_len = 1; 12015 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12016 %} 12017 ins_pipe( pipe_slow ); 12018 %} 12019 12020 instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12021 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12022 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12023 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12024 match(Set dst (VectorMaskCmp src1 src2)); 12025 effect(TEMP dst, TEMP scratch); 12026 format %{ "vpcmpeqd k2,$src1,$src2\n\t" 12027 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} 12028 ins_encode %{ 12029 int vector_len = 2; 12030 Assembler::ComparisonPredicate cmp = Assembler::eq; 12031 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12032 KRegister mask = k0; // The comparison itself is not being masked. 12033 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12034 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12035 %} 12036 ins_pipe( pipe_slow ); 12037 %} 12038 12039 instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ 12040 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12041 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12042 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12043 match(Set dst (VectorMaskCmp src1 src2)); 12044 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} 12045 ins_encode %{ 12046 int vector_len = 0; 12047 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12048 %} 12049 ins_pipe( pipe_slow ); 12050 %} 12051 12052 instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ 12053 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12054 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12055 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12056 match(Set dst (VectorMaskCmp src1 src2)); 12057 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} 12058 ins_encode %{ 12059 int vector_len = 0; 12060 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12061 %} 12062 ins_pipe( pipe_slow ); 12063 %} 12064 12065 instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ 12066 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12067 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12068 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12069 match(Set dst (VectorMaskCmp src1 src2)); 12070 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} 12071 ins_encode %{ 12072 int vector_len = 1; 12073 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12074 %} 12075 ins_pipe( pipe_slow ); 12076 %} 12077 12078 instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12079 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12080 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12081 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12082 match(Set dst (VectorMaskCmp src1 src2)); 12083 effect(TEMP dst, TEMP scratch); 12084 format %{ "vpcmpnled k2,$src1,$src2\n\t" 12085 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 12086 ins_encode %{ 12087 int vector_len = 2; 12088 Assembler::ComparisonPredicate cmp = Assembler::lt; 12089 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12090 KRegister mask = k0; // The comparison itself is not being masked. 12091 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12092 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12093 %} 12094 ins_pipe( pipe_slow ); 12095 %} 12096 12097 instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ 12098 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12099 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12100 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12101 match(Set dst (VectorMaskCmp src1 src2)); 12102 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} 12103 ins_encode %{ 12104 int vector_len = 0; 12105 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12106 %} 12107 ins_pipe( pipe_slow ); 12108 %} 12109 12110 instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ 12111 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12112 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12113 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12114 match(Set dst (VectorMaskCmp src1 src2)); 12115 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} 12116 ins_encode %{ 12117 int vector_len = 0; 12118 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12119 %} 12120 ins_pipe( pipe_slow ); 12121 %} 12122 12123 instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ 12124 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12125 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12126 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12127 match(Set dst (VectorMaskCmp src1 src2)); 12128 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} 12129 ins_encode %{ 12130 int vector_len = 1; 12131 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12132 %} 12133 ins_pipe( pipe_slow ); 12134 %} 12135 12136 instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12137 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12138 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12139 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12140 match(Set dst (VectorMaskCmp src1 src2)); 12141 effect(TEMP dst, TEMP scratch); 12142 format %{ "vpcmpnled k2,$src1,$src2\n\t" 12143 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 12144 ins_encode %{ 12145 int vector_len = 2; 12146 Assembler::ComparisonPredicate cmp = Assembler::nle; 12147 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12148 KRegister mask = k0; // The comparison itself is not being masked. 12149 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12150 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12151 %} 12152 ins_pipe( pipe_slow ); 12153 %} 12154 12155 instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12156 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12157 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12158 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12159 match(Set dst (VectorMaskCmp src1 src2)); 12160 effect(TEMP scratch); 12161 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 12162 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} 12163 ins_encode %{ 12164 int vector_len = 0; 12165 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12166 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12167 %} 12168 ins_pipe( pipe_slow ); 12169 %} 12170 12171 instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12172 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12173 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12174 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12175 match(Set dst (VectorMaskCmp src1 src2)); 12176 effect(TEMP scratch); 12177 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 12178 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} 12179 ins_encode %{ 12180 int vector_len = 0; 12181 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12182 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12183 %} 12184 ins_pipe( pipe_slow ); 12185 %} 12186 12187 instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12188 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12189 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12190 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12191 match(Set dst (VectorMaskCmp src1 src2)); 12192 effect(TEMP scratch); 12193 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 12194 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} 12195 ins_encode %{ 12196 int vector_len = 1; 12197 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12198 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12199 %} 12200 ins_pipe( pipe_slow ); 12201 %} 12202 12203 instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12204 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12205 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12206 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12207 match(Set dst (VectorMaskCmp src1 src2)); 12208 effect(TEMP dst, TEMP scratch); 12209 format %{ "vpcmpnltd k2,$src1,$src2\n\t" 12210 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} 12211 ins_encode %{ 12212 int vector_len = 2; 12213 Assembler::ComparisonPredicate cmp = Assembler::nlt; 12214 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12215 KRegister mask = k0; // The comparison itself is not being masked. 12216 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12217 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12218 %} 12219 ins_pipe( pipe_slow ); 12220 %} 12221 12222 instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12223 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12224 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12225 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12226 match(Set dst (VectorMaskCmp src1 src2)); 12227 effect(TEMP scratch); 12228 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 12229 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} 12230 ins_encode %{ 12231 int vector_len = 0; 12232 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12233 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12234 %} 12235 ins_pipe( pipe_slow ); 12236 %} 12237 12238 instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12239 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12240 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12241 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12242 match(Set dst (VectorMaskCmp src1 src2)); 12243 effect(TEMP scratch); 12244 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 12245 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} 12246 ins_encode %{ 12247 int vector_len = 0; 12248 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12249 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12250 %} 12251 ins_pipe( pipe_slow ); 12252 %} 12253 12254 instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12255 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12256 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12257 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12258 match(Set dst (VectorMaskCmp src1 src2)); 12259 effect(TEMP scratch); 12260 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 12261 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} 12262 ins_encode %{ 12263 int vector_len = 1; 12264 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12265 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12266 %} 12267 ins_pipe( pipe_slow ); 12268 %} 12269 12270 instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12271 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12272 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12273 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12274 match(Set dst (VectorMaskCmp src1 src2)); 12275 effect(TEMP dst, TEMP scratch); 12276 format %{ "vpcmpled k2,$src1,$src2\n\t" 12277 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} 12278 ins_encode %{ 12279 int vector_len = 2; 12280 Assembler::ComparisonPredicate cmp = Assembler::le; 12281 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12282 KRegister mask = k0; // The comparison itself is not being masked. 12283 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12284 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12285 %} 12286 ins_pipe( pipe_slow ); 12287 %} 12288 12289 instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 12291 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12292 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12293 match(Set dst (VectorMaskCmp src1 src2)); 12294 effect(TEMP scratch); 12295 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 12296 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} 12297 ins_encode %{ 12298 int vector_len = 0; 12299 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12300 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12301 %} 12302 ins_pipe( pipe_slow ); 12303 %} 12304 12305 instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12306 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12307 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12308 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12309 match(Set dst (VectorMaskCmp src1 src2)); 12310 effect(TEMP scratch); 12311 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 12312 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} 12313 ins_encode %{ 12314 int vector_len = 0; 12315 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12316 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12317 %} 12318 ins_pipe( pipe_slow ); 12319 %} 12320 12321 instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12322 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 12323 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12324 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12325 match(Set dst (VectorMaskCmp src1 src2)); 12326 effect(TEMP scratch); 12327 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 12328 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} 12329 ins_encode %{ 12330 int vector_len = 1; 12331 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12332 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12333 %} 12334 ins_pipe( pipe_slow ); 12335 %} 12336 12337 instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12338 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 12339 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12340 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 12341 match(Set dst (VectorMaskCmp src1 src2)); 12342 effect(TEMP dst, TEMP scratch); 12343 format %{ "vpcmpneqd k2,$src1,$src2\n\t" 12344 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} 12345 ins_encode %{ 12346 int vector_len = 2; 12347 Assembler::ComparisonPredicate cmp = Assembler::neq; 12348 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12349 KRegister mask = k0; // The comparison itself is not being masked. 12350 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12351 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12352 %} 12353 ins_pipe( pipe_slow ); 12354 %} 12355 12356 instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ 12357 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12358 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12359 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12360 match(Set dst (VectorMaskCmp src1 src2)); 12361 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} 12362 ins_encode %{ 12363 int vector_len = 0; 12364 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12365 %} 12366 ins_pipe( pipe_slow ); 12367 %} 12368 12369 instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ 12370 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12371 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12372 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12373 match(Set dst (VectorMaskCmp src1 src2)); 12374 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} 12375 ins_encode %{ 12376 int vector_len = 0; 12377 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12378 %} 12379 ins_pipe( pipe_slow ); 12380 %} 12381 12382 instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ 12383 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12384 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12385 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12386 match(Set dst (VectorMaskCmp src1 src2)); 12387 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} 12388 ins_encode %{ 12389 int vector_len = 1; 12390 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12391 %} 12392 ins_pipe( pipe_slow ); 12393 %} 12394 12395 instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12396 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12397 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12398 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12399 match(Set dst (VectorMaskCmp src1 src2)); 12400 effect(TEMP dst, TEMP scratch); 12401 format %{ "vpcmpeqb k2,$src1,$src2\n\t" 12402 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} 12403 ins_encode %{ 12404 int vector_len = 2; 12405 Assembler::ComparisonPredicate cmp = Assembler::eq; 12406 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12407 KRegister mask = k0; // The comparison itself is not being masked. 12408 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12409 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12410 %} 12411 ins_pipe( pipe_slow ); 12412 %} 12413 12414 instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ 12415 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12416 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12417 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12418 match(Set dst (VectorMaskCmp src1 src2)); 12419 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} 12420 ins_encode %{ 12421 int vector_len = 0; 12422 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12423 %} 12424 ins_pipe( pipe_slow ); 12425 %} 12426 12427 instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ 12428 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12429 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12430 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12431 match(Set dst (VectorMaskCmp src1 src2)); 12432 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} 12433 ins_encode %{ 12434 int vector_len = 0; 12435 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12436 %} 12437 ins_pipe( pipe_slow ); 12438 %} 12439 12440 instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ 12441 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12442 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12443 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12444 match(Set dst (VectorMaskCmp src1 src2)); 12445 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} 12446 ins_encode %{ 12447 int vector_len = 1; 12448 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12449 %} 12450 ins_pipe( pipe_slow ); 12451 %} 12452 12453 instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12454 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12455 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12456 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12457 match(Set dst (VectorMaskCmp src1 src2)); 12458 effect(TEMP dst, TEMP scratch); 12459 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 12460 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 12461 ins_encode %{ 12462 int vector_len = 2; 12463 Assembler::ComparisonPredicate cmp = Assembler::lt; 12464 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12465 KRegister mask = k0; // The comparison itself is not being masked. 12466 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12467 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12468 %} 12469 ins_pipe( pipe_slow ); 12470 %} 12471 12472 instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ 12473 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12474 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12475 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12476 match(Set dst (VectorMaskCmp src1 src2)); 12477 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} 12478 ins_encode %{ 12479 int vector_len = 0; 12480 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12481 %} 12482 ins_pipe( pipe_slow ); 12483 %} 12484 12485 instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ 12486 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12487 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12488 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12489 match(Set dst (VectorMaskCmp src1 src2)); 12490 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} 12491 ins_encode %{ 12492 int vector_len = 0; 12493 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12494 %} 12495 ins_pipe( pipe_slow ); 12496 %} 12497 12498 instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ 12499 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12500 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12501 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12502 match(Set dst (VectorMaskCmp src1 src2)); 12503 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} 12504 ins_encode %{ 12505 int vector_len = 1; 12506 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12507 %} 12508 ins_pipe( pipe_slow ); 12509 %} 12510 12511 instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12512 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12513 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12514 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12515 match(Set dst (VectorMaskCmp src1 src2)); 12516 effect(TEMP dst, TEMP scratch); 12517 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 12518 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 12519 ins_encode %{ 12520 int vector_len = 2; 12521 Assembler::ComparisonPredicate cmp = Assembler::nle; 12522 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12523 KRegister mask = k0; // The comparison itself is not being masked. 12524 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12525 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12526 %} 12527 ins_pipe( pipe_slow ); 12528 %} 12529 12530 instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12531 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12532 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12533 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12534 match(Set dst (VectorMaskCmp src1 src2)); 12535 effect(TEMP scratch); 12536 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 12537 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} 12538 ins_encode %{ 12539 int vector_len = 0; 12540 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12541 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12542 %} 12543 ins_pipe( pipe_slow ); 12544 %} 12545 12546 instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12547 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12548 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12549 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12550 match(Set dst (VectorMaskCmp src1 src2)); 12551 effect(TEMP scratch); 12552 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 12553 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} 12554 ins_encode %{ 12555 int vector_len = 0; 12556 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12557 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12558 %} 12559 ins_pipe( pipe_slow ); 12560 %} 12561 12562 instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12563 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12564 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12565 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12566 match(Set dst (VectorMaskCmp src1 src2)); 12567 effect(TEMP scratch); 12568 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 12569 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} 12570 ins_encode %{ 12571 int vector_len = 1; 12572 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12573 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12574 %} 12575 ins_pipe( pipe_slow ); 12576 %} 12577 12578 instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12579 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12580 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12581 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12582 match(Set dst (VectorMaskCmp src1 src2)); 12583 effect(TEMP dst, TEMP scratch); 12584 format %{ "vpcmpnltb k2,$src1,$src2\n\t" 12585 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} 12586 ins_encode %{ 12587 int vector_len = 2; 12588 Assembler::ComparisonPredicate cmp = Assembler::nlt; 12589 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12590 KRegister mask = k0; // The comparison itself is not being masked. 12591 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12592 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12593 %} 12594 ins_pipe( pipe_slow ); 12595 %} 12596 12597 instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12598 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12599 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12600 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12601 match(Set dst (VectorMaskCmp src1 src2)); 12602 effect(TEMP scratch); 12603 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 12604 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} 12605 ins_encode %{ 12606 int vector_len = 0; 12607 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12608 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12609 %} 12610 ins_pipe( pipe_slow ); 12611 %} 12612 12613 instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12614 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12615 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12616 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12617 match(Set dst (VectorMaskCmp src1 src2)); 12618 effect(TEMP scratch); 12619 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 12620 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} 12621 ins_encode %{ 12622 int vector_len = 0; 12623 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12624 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12625 %} 12626 ins_pipe( pipe_slow ); 12627 %} 12628 12629 instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12630 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12631 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12632 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12633 match(Set dst (VectorMaskCmp src1 src2)); 12634 effect(TEMP scratch); 12635 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 12636 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} 12637 ins_encode %{ 12638 int vector_len = 1; 12639 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12640 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12641 %} 12642 ins_pipe( pipe_slow ); 12643 %} 12644 12645 instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12646 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12647 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12648 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12649 match(Set dst (VectorMaskCmp src1 src2)); 12650 effect(TEMP dst, TEMP scratch); 12651 format %{ "vpcmpleb k2,$src1,$src2\n\t" 12652 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} 12653 ins_encode %{ 12654 int vector_len = 2; 12655 Assembler::ComparisonPredicate cmp = Assembler::le; 12656 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12657 KRegister mask = k0; // The comparison itself is not being masked. 12658 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12659 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12660 %} 12661 ins_pipe( pipe_slow ); 12662 %} 12663 12664 instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12665 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12666 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12667 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12668 match(Set dst (VectorMaskCmp src1 src2)); 12669 effect(TEMP scratch); 12670 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 12671 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} 12672 ins_encode %{ 12673 int vector_len = 0; 12674 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12675 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12676 %} 12677 ins_pipe( pipe_slow ); 12678 %} 12679 12680 instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12681 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 12682 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12683 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12684 match(Set dst (VectorMaskCmp src1 src2)); 12685 effect(TEMP scratch); 12686 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 12687 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} 12688 ins_encode %{ 12689 int vector_len = 0; 12690 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12691 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12692 %} 12693 ins_pipe( pipe_slow ); 12694 %} 12695 12696 instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12697 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 12698 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12699 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12700 match(Set dst (VectorMaskCmp src1 src2)); 12701 effect(TEMP scratch); 12702 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 12703 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} 12704 ins_encode %{ 12705 int vector_len = 1; 12706 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12707 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12708 %} 12709 ins_pipe( pipe_slow ); 12710 %} 12711 12712 instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12713 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 12714 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 12715 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 12716 match(Set dst (VectorMaskCmp src1 src2)); 12717 effect(TEMP dst, TEMP scratch); 12718 format %{ "vpcmpneqb k2,$src1,$src2\n\t" 12719 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} 12720 ins_encode %{ 12721 int vector_len = 2; 12722 Assembler::ComparisonPredicate cmp = Assembler::neq; 12723 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12724 KRegister mask = k0; // The comparison itself is not being masked. 12725 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12726 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12727 %} 12728 ins_pipe( pipe_slow ); 12729 %} 12730 12731 instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ 12732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12733 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12734 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12735 match(Set dst (VectorMaskCmp src1 src2)); 12736 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} 12737 ins_encode %{ 12738 int vector_len = 0; 12739 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12740 %} 12741 ins_pipe( pipe_slow ); 12742 %} 12743 12744 instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ 12745 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12746 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12747 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12748 match(Set dst (VectorMaskCmp src1 src2)); 12749 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} 12750 ins_encode %{ 12751 int vector_len = 0; 12752 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12753 %} 12754 ins_pipe( pipe_slow ); 12755 %} 12756 12757 instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ 12758 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 12759 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12760 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12761 match(Set dst (VectorMaskCmp src1 src2)); 12762 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} 12763 ins_encode %{ 12764 int vector_len = 1; 12765 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12766 %} 12767 ins_pipe( pipe_slow ); 12768 %} 12769 12770 instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12771 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 12772 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 12773 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12774 match(Set dst (VectorMaskCmp src1 src2)); 12775 effect(TEMP dst, TEMP scratch); 12776 format %{ "vpcmpeqw k2,$src1,$src2\n\t" 12777 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} 12778 ins_encode %{ 12779 int vector_len = 2; 12780 Assembler::ComparisonPredicate cmp = Assembler::eq; 12781 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12782 KRegister mask = k0; // The comparison itself is not being masked. 12783 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12784 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12785 %} 12786 ins_pipe( pipe_slow ); 12787 %} 12788 12789 instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ 12790 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12791 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12792 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12793 match(Set dst (VectorMaskCmp src1 src2)); 12794 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} 12795 ins_encode %{ 12796 int vector_len = 0; 12797 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12798 %} 12799 ins_pipe( pipe_slow ); 12800 %} 12801 12802 instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ 12803 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12804 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12805 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12806 match(Set dst (VectorMaskCmp src1 src2)); 12807 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} 12808 ins_encode %{ 12809 int vector_len = 0; 12810 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12811 %} 12812 ins_pipe( pipe_slow ); 12813 %} 12814 12815 instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ 12816 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 12817 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12818 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12819 match(Set dst (VectorMaskCmp src1 src2)); 12820 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} 12821 ins_encode %{ 12822 int vector_len = 1; 12823 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12824 %} 12825 ins_pipe( pipe_slow ); 12826 %} 12827 12828 instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12829 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 12830 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 12831 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12832 match(Set dst (VectorMaskCmp src1 src2)); 12833 effect(TEMP dst, TEMP scratch); 12834 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 12835 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 12836 ins_encode %{ 12837 int vector_len = 2; 12838 Assembler::ComparisonPredicate cmp = Assembler::lt; 12839 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12840 KRegister mask = k0; // The comparison itself is not being masked. 12841 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12842 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12843 %} 12844 ins_pipe( pipe_slow ); 12845 %} 12846 12847 instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ 12848 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12849 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12850 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12851 match(Set dst (VectorMaskCmp src1 src2)); 12852 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} 12853 ins_encode %{ 12854 int vector_len = 0; 12855 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12856 %} 12857 ins_pipe( pipe_slow ); 12858 %} 12859 12860 instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ 12861 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12862 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12863 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12864 match(Set dst (VectorMaskCmp src1 src2)); 12865 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} 12866 ins_encode %{ 12867 int vector_len = 0; 12868 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12869 %} 12870 ins_pipe( pipe_slow ); 12871 %} 12872 12873 instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ 12874 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 12875 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12876 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12877 match(Set dst (VectorMaskCmp src1 src2)); 12878 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} 12879 ins_encode %{ 12880 int vector_len = 1; 12881 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12882 %} 12883 ins_pipe( pipe_slow ); 12884 %} 12885 12886 instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12887 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 12888 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 12889 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12890 match(Set dst (VectorMaskCmp src1 src2)); 12891 effect(TEMP dst, TEMP scratch); 12892 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 12893 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 12894 ins_encode %{ 12895 int vector_len = 2; 12896 Assembler::ComparisonPredicate cmp = Assembler::nle; 12897 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12898 KRegister mask = k0; // The comparison itself is not being masked. 12899 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12900 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12901 %} 12902 ins_pipe( pipe_slow ); 12903 %} 12904 12905 instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12906 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12907 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12908 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12909 match(Set dst (VectorMaskCmp src1 src2)); 12910 effect(TEMP scratch); 12911 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 12912 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} 12913 ins_encode %{ 12914 int vector_len = 0; 12915 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12916 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12917 %} 12918 ins_pipe( pipe_slow ); 12919 %} 12920 12921 instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12922 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12923 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12924 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12925 match(Set dst (VectorMaskCmp src1 src2)); 12926 effect(TEMP scratch); 12927 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 12928 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} 12929 ins_encode %{ 12930 int vector_len = 0; 12931 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12932 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12933 %} 12934 ins_pipe( pipe_slow ); 12935 %} 12936 12937 instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 12938 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 12939 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12940 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12941 match(Set dst (VectorMaskCmp src1 src2)); 12942 effect(TEMP scratch); 12943 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 12944 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} 12945 ins_encode %{ 12946 int vector_len = 1; 12947 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 12948 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12949 %} 12950 ins_pipe( pipe_slow ); 12951 %} 12952 12953 instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 12954 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 12955 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 12956 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12957 match(Set dst (VectorMaskCmp src1 src2)); 12958 effect(TEMP dst, TEMP scratch); 12959 format %{ "vpcmpnltw k2,$src1,$src2\n\t" 12960 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} 12961 ins_encode %{ 12962 int vector_len = 2; 12963 Assembler::ComparisonPredicate cmp = Assembler::nlt; 12964 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 12965 KRegister mask = k0; // The comparison itself is not being masked. 12966 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 12967 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 12968 %} 12969 ins_pipe( pipe_slow ); 12970 %} 12971 12972 instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 12973 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 12974 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12975 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12976 match(Set dst (VectorMaskCmp src1 src2)); 12977 effect(TEMP scratch); 12978 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 12979 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} 12980 ins_encode %{ 12981 int vector_len = 0; 12982 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12983 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 12984 %} 12985 ins_pipe( pipe_slow ); 12986 %} 12987 12988 instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 12989 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 12990 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 12991 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 12992 match(Set dst (VectorMaskCmp src1 src2)); 12993 effect(TEMP scratch); 12994 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 12995 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} 12996 ins_encode %{ 12997 int vector_len = 0; 12998 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12999 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13000 %} 13001 ins_pipe( pipe_slow ); 13002 %} 13003 13004 instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13005 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13006 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13007 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13008 match(Set dst (VectorMaskCmp src1 src2)); 13009 effect(TEMP scratch); 13010 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 13011 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} 13012 ins_encode %{ 13013 int vector_len = 1; 13014 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13015 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13016 %} 13017 ins_pipe( pipe_slow ); 13018 %} 13019 13020 instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13021 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13022 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13023 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13024 match(Set dst (VectorMaskCmp src1 src2)); 13025 effect(TEMP dst, TEMP scratch); 13026 format %{ "vpcmplew k2,$src1,$src2\n\t" 13027 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} 13028 ins_encode %{ 13029 int vector_len = 2; 13030 Assembler::ComparisonPredicate cmp = Assembler::le; 13031 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13032 KRegister mask = k0; // The comparison itself is not being masked. 13033 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13034 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13035 %} 13036 ins_pipe( pipe_slow ); 13037 %} 13038 13039 instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13040 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13041 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13042 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13043 match(Set dst (VectorMaskCmp src1 src2)); 13044 effect(TEMP scratch); 13045 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 13046 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} 13047 ins_encode %{ 13048 int vector_len = 0; 13049 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13050 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13051 %} 13052 ins_pipe( pipe_slow ); 13053 %} 13054 13055 instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13056 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13057 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13058 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13059 match(Set dst (VectorMaskCmp src1 src2)); 13060 effect(TEMP scratch); 13061 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 13062 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} 13063 ins_encode %{ 13064 int vector_len = 0; 13065 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13066 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13067 %} 13068 ins_pipe( pipe_slow ); 13069 %} 13070 13071 instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13072 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 13073 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13074 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13075 match(Set dst (VectorMaskCmp src1 src2)); 13076 effect(TEMP scratch); 13077 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 13078 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} 13079 ins_encode %{ 13080 int vector_len = 1; 13081 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13082 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13083 %} 13084 ins_pipe( pipe_slow ); 13085 %} 13086 13087 instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13088 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 13089 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13090 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13091 match(Set dst (VectorMaskCmp src1 src2)); 13092 effect(TEMP dst, TEMP scratch); 13093 format %{ "vpcmpneqw k2,$src1,$src2\n\t" 13094 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} 13095 ins_encode %{ 13096 int vector_len = 2; 13097 Assembler::ComparisonPredicate cmp = Assembler::neq; 13098 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13099 KRegister mask = k0; // The comparison itself is not being masked. 13100 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13101 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13102 %} 13103 ins_pipe( pipe_slow ); 13104 %} 13105 13106 instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ 13107 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13108 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13109 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13110 match(Set dst (VectorMaskCmp src1 src2)); 13111 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} 13112 ins_encode %{ 13113 int vector_len = 0; 13114 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13115 %} 13116 ins_pipe( pipe_slow ); 13117 %} 13118 13119 instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ 13120 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13121 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13122 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13123 match(Set dst (VectorMaskCmp src1 src2)); 13124 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} 13125 ins_encode %{ 13126 int vector_len = 0; 13127 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13128 %} 13129 ins_pipe( pipe_slow ); 13130 %} 13131 13132 instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ 13133 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13134 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13135 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13136 match(Set dst (VectorMaskCmp src1 src2)); 13137 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} 13138 ins_encode %{ 13139 int vector_len = 1; 13140 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13141 %} 13142 ins_pipe( pipe_slow ); 13143 %} 13144 13145 instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13146 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13147 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13148 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13149 match(Set dst (VectorMaskCmp src1 src2)); 13150 effect(TEMP dst, TEMP scratch); 13151 format %{ "vpcmpeqq k2,$src1,$src2\n\t" 13152 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} 13153 ins_encode %{ 13154 int vector_len = 2; 13155 Assembler::ComparisonPredicate cmp = Assembler::eq; 13156 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13157 KRegister mask = k0; // The comparison itself is not being masked. 13158 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13159 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13160 %} 13161 ins_pipe( pipe_slow ); 13162 %} 13163 13164 instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ 13165 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13166 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13167 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13168 match(Set dst (VectorMaskCmp src1 src2)); 13169 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} 13170 ins_encode %{ 13171 int vector_len = 0; 13172 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13173 %} 13174 ins_pipe( pipe_slow ); 13175 %} 13176 13177 instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ 13178 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13179 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13180 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13181 match(Set dst (VectorMaskCmp src1 src2)); 13182 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} 13183 ins_encode %{ 13184 int vector_len = 0; 13185 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13186 %} 13187 ins_pipe( pipe_slow ); 13188 %} 13189 13190 instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ 13191 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13192 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13193 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13194 match(Set dst (VectorMaskCmp src1 src2)); 13195 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} 13196 ins_encode %{ 13197 int vector_len = 1; 13198 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13199 %} 13200 ins_pipe( pipe_slow ); 13201 %} 13202 13203 instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13204 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13205 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 13206 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13207 match(Set dst (VectorMaskCmp src1 src2)); 13208 effect(TEMP dst, TEMP scratch); 13209 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 13210 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 13211 ins_encode %{ 13212 int vector_len = 2; 13213 Assembler::ComparisonPredicate cmp = Assembler::lt; 13214 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13215 KRegister mask = k0; // The comparison itself is not being masked. 13216 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13217 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13218 %} 13219 ins_pipe( pipe_slow ); 13220 %} 13221 13222 instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ 13223 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13224 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13225 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13226 match(Set dst (VectorMaskCmp src1 src2)); 13227 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} 13228 ins_encode %{ 13229 int vector_len = 0; 13230 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13231 %} 13232 ins_pipe( pipe_slow ); 13233 %} 13234 13235 instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ 13236 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13237 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13238 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13239 match(Set dst (VectorMaskCmp src1 src2)); 13240 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} 13241 ins_encode %{ 13242 int vector_len = 0; 13243 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13244 %} 13245 ins_pipe( pipe_slow ); 13246 %} 13247 13248 instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ 13249 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13250 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13251 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13252 match(Set dst (VectorMaskCmp src1 src2)); 13253 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} 13254 ins_encode %{ 13255 int vector_len = 1; 13256 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13257 %} 13258 ins_pipe( pipe_slow ); 13259 %} 13260 13261 instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13262 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13263 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 13264 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13265 match(Set dst (VectorMaskCmp src1 src2)); 13266 effect(TEMP dst, TEMP scratch); 13267 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 13268 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 13269 ins_encode %{ 13270 int vector_len = 2; 13271 Assembler::ComparisonPredicate cmp = Assembler::nle; 13272 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13273 KRegister mask = k0; // The comparison itself is not being masked. 13274 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13275 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13276 %} 13277 ins_pipe( pipe_slow ); 13278 %} 13279 13280 instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13281 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13282 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13283 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13284 match(Set dst (VectorMaskCmp src1 src2)); 13285 effect(TEMP scratch); 13286 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 13287 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} 13288 ins_encode %{ 13289 int vector_len = 0; 13290 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13291 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13292 %} 13293 ins_pipe( pipe_slow ); 13294 %} 13295 13296 instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13297 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13298 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13299 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13300 match(Set dst (VectorMaskCmp src1 src2)); 13301 effect(TEMP scratch); 13302 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 13303 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} 13304 ins_encode %{ 13305 int vector_len = 0; 13306 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13307 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13308 %} 13309 ins_pipe( pipe_slow ); 13310 %} 13311 13312 instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13313 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13314 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13315 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13316 match(Set dst (VectorMaskCmp src1 src2)); 13317 effect(TEMP scratch); 13318 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 13319 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} 13320 ins_encode %{ 13321 int vector_len = 1; 13322 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 13323 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13324 %} 13325 ins_pipe( pipe_slow ); 13326 %} 13327 13328 instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13329 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13330 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 13331 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13332 match(Set dst (VectorMaskCmp src1 src2)); 13333 effect(TEMP dst, TEMP scratch); 13334 format %{ "vpcmpnltq k2,$src1,$src2\n\t" 13335 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} 13336 ins_encode %{ 13337 int vector_len = 2; 13338 Assembler::ComparisonPredicate cmp = Assembler::nlt; 13339 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13340 KRegister mask = k0; // The comparison itself is not being masked. 13341 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13342 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13343 %} 13344 ins_pipe( pipe_slow ); 13345 %} 13346 13347 instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13348 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13349 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13350 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13351 match(Set dst (VectorMaskCmp src1 src2)); 13352 effect(TEMP scratch); 13353 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 13354 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} 13355 ins_encode %{ 13356 int vector_len = 0; 13357 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13358 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13359 %} 13360 ins_pipe( pipe_slow ); 13361 %} 13362 13363 instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13364 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13365 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13366 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13367 match(Set dst (VectorMaskCmp src1 src2)); 13368 effect(TEMP scratch); 13369 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 13370 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} 13371 ins_encode %{ 13372 int vector_len = 0; 13373 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13374 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13375 %} 13376 ins_pipe( pipe_slow ); 13377 %} 13378 13379 instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13380 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13381 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13382 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13383 match(Set dst (VectorMaskCmp src1 src2)); 13384 effect(TEMP scratch); 13385 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 13386 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} 13387 ins_encode %{ 13388 int vector_len = 1; 13389 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13390 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13391 %} 13392 ins_pipe( pipe_slow ); 13393 %} 13394 13395 instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13396 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13397 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 13398 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13399 match(Set dst (VectorMaskCmp src1 src2)); 13400 effect(TEMP dst, TEMP scratch); 13401 format %{ "vpcmpleq k2,$src1,$src2\n\t" 13402 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} 13403 ins_encode %{ 13404 int vector_len = 2; 13405 Assembler::ComparisonPredicate cmp = Assembler::le; 13406 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13407 KRegister mask = k0; // The comparison itself is not being masked. 13408 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13409 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13410 %} 13411 ins_pipe( pipe_slow ); 13412 %} 13413 13414 instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 13415 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 13416 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13417 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13418 match(Set dst (VectorMaskCmp src1 src2)); 13419 effect(TEMP scratch); 13420 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 13421 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} 13422 ins_encode %{ 13423 int vector_len = 0; 13424 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13425 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13426 %} 13427 ins_pipe( pipe_slow ); 13428 %} 13429 13430 instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 13431 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13432 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13433 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13434 match(Set dst (VectorMaskCmp src1 src2)); 13435 effect(TEMP scratch); 13436 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 13437 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} 13438 ins_encode %{ 13439 int vector_len = 0; 13440 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13441 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13442 %} 13443 ins_pipe( pipe_slow ); 13444 %} 13445 13446 instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 13447 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 13448 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13449 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13450 match(Set dst (VectorMaskCmp src1 src2)); 13451 effect(TEMP scratch); 13452 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 13453 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} 13454 ins_encode %{ 13455 int vector_len = 1; 13456 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13457 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13458 %} 13459 ins_pipe( pipe_slow ); 13460 %} 13461 13462 instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13463 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 13464 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 13465 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13466 match(Set dst (VectorMaskCmp src1 src2)); 13467 effect(TEMP dst, TEMP scratch); 13468 format %{ "vpcmpneqq k2,$src1,$src2\n\t" 13469 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} 13470 ins_encode %{ 13471 int vector_len = 2; 13472 Assembler::ComparisonPredicate cmp = Assembler::neq; 13473 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13474 KRegister mask = k0; // The comparison itself is not being masked. 13475 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13476 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13477 %} 13478 ins_pipe( pipe_slow ); 13479 %} 13480 13481 instruct blendvps2F(vecD dst, vecD src, rxmm0 mask) %{ 13482 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13483 match(Set dst (VectorBlend (Binary dst src) mask)); 13484 format %{ "blendvps $dst,$src,$mask\t! packed2F" %} 13485 ins_encode %{ 13486 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 13487 %} 13488 ins_pipe( pipe_slow ); 13489 %} 13490 13491 instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13493 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13494 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} 13495 ins_encode %{ 13496 int vector_len = 0; 13497 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13498 %} 13499 ins_pipe( pipe_slow ); 13500 %} 13501 13502 instruct blendvps4F(vecX dst, vecX src, rxmm0 mask) %{ 13503 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13504 match(Set dst (VectorBlend (Binary dst src) mask)); 13505 format %{ "blendvps $dst,$src,$mask\t! packed4F" %} 13506 ins_encode %{ 13507 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 13508 %} 13509 ins_pipe( pipe_slow ); 13510 %} 13511 13512 instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13513 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13514 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13515 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} 13516 ins_encode %{ 13517 int vector_len = 0; 13518 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13519 %} 13520 ins_pipe( pipe_slow ); 13521 %} 13522 13523 instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13524 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13525 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13526 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} 13527 ins_encode %{ 13528 int vector_len = 1; 13529 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13530 %} 13531 ins_pipe( pipe_slow ); 13532 %} 13533 13534 instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13535 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13536 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13537 effect(TEMP scratch); 13538 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 13539 "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} 13540 ins_encode %{ 13541 int vector_len = 2; 13542 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13543 __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 13544 __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13545 %} 13546 ins_pipe( pipe_slow ); 13547 %} 13548 13549 instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13550 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13551 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13552 effect(TEMP scratch); 13553 format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 13554 "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} 13555 ins_encode %{ 13556 int vector_len = 2; 13557 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13558 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13559 __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13560 %} 13561 ins_pipe( pipe_slow ); 13562 %} 13563 13564 instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13565 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); 13566 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13567 effect(TEMP scratch); 13568 format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" 13569 "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} 13570 ins_encode %{ 13571 int vector_len = 2; 13572 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13573 __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13574 __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13575 %} 13576 ins_pipe( pipe_slow ); 13577 %} 13578 13579 instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13580 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); 13581 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13582 effect(TEMP scratch); 13583 format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" 13584 "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} 13585 ins_encode %{ 13586 int vector_len = 2; 13587 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13588 __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13589 __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13590 %} 13591 ins_pipe( pipe_slow ); 13592 %} 13593 13594 instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13595 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13596 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13597 effect(TEMP scratch); 13598 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 13599 "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} 13600 ins_encode %{ 13601 int vector_len = 2; 13602 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13603 __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13604 __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13605 %} 13606 ins_pipe( pipe_slow ); 13607 %} 13608 13609 instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 13610 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13611 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13612 effect(TEMP scratch); 13613 format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 13614 "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} 13615 ins_encode %{ 13616 int vector_len = 2; 13617 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13618 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 13619 __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 13620 %} 13621 ins_pipe( pipe_slow ); 13622 %} 13623 13624 13625 instruct pblendvb2I(vecD dst, vecD src, rxmm0 mask) %{ 13626 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13627 match(Set dst (VectorBlend (Binary dst src) mask)); 13628 format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} 13629 ins_encode %{ 13630 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13631 %} 13632 ins_pipe( pipe_slow ); 13633 %} 13634 13635 instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13636 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13637 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13638 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} 13639 ins_encode %{ 13640 int vector_len = 0; 13641 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13642 %} 13643 ins_pipe( pipe_slow ); 13644 %} 13645 13646 instruct pblendvb4I(vecX dst, vecX src, rxmm0 mask) %{ 13647 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13648 match(Set dst (VectorBlend (Binary dst src) mask)); 13649 format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} 13650 ins_encode %{ 13651 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13652 %} 13653 ins_pipe( pipe_slow ); 13654 %} 13655 13656 instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13657 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13658 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13659 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} 13660 ins_encode %{ 13661 int vector_len = 0; 13662 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13663 %} 13664 ins_pipe( pipe_slow ); 13665 %} 13666 13667 instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13668 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 13669 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13670 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} 13671 ins_encode %{ 13672 int vector_len = 1; 13673 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13674 %} 13675 ins_pipe( pipe_slow ); 13676 %} 13677 13678 instruct pblendvb8B(vecD dst, vecD src, rxmm0 mask) %{ 13679 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13680 match(Set dst (VectorBlend (Binary dst src) mask)); 13681 format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} 13682 ins_encode %{ 13683 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13684 %} 13685 ins_pipe( pipe_slow ); 13686 %} 13687 13688 instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13689 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13690 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13691 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} 13692 ins_encode %{ 13693 int vector_len = 0; 13694 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13695 %} 13696 ins_pipe( pipe_slow ); 13697 %} 13698 13699 instruct pblendvb16B(vecX dst, vecX src, rxmm0 mask) %{ 13700 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13701 match(Set dst (VectorBlend (Binary dst src) mask)); 13702 format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} 13703 ins_encode %{ 13704 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13705 %} 13706 ins_pipe( pipe_slow ); 13707 %} 13708 13709 instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13710 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13711 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13712 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} 13713 ins_encode %{ 13714 int vector_len = 0; 13715 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13716 %} 13717 ins_pipe( pipe_slow ); 13718 %} 13719 13720 instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13721 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 13722 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13723 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} 13724 ins_encode %{ 13725 int vector_len = 1; 13726 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13727 %} 13728 ins_pipe( pipe_slow ); 13729 %} 13730 13731 instruct pblendvb4S(vecD dst, vecD src, rxmm0 mask) %{ 13732 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13733 match(Set dst (VectorBlend (Binary dst src) mask)); 13734 format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} 13735 ins_encode %{ 13736 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13737 %} 13738 ins_pipe( pipe_slow ); 13739 %} 13740 13741 instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13742 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13743 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13744 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} 13745 ins_encode %{ 13746 int vector_len = 0; 13747 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13748 %} 13749 ins_pipe( pipe_slow ); 13750 %} 13751 13752 instruct pblendvb8S(vecX dst, vecX src, rxmm0 mask) %{ 13753 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13754 match(Set dst (VectorBlend (Binary dst src) mask)); 13755 format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} 13756 ins_encode %{ 13757 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13758 %} 13759 ins_pipe( pipe_slow ); 13760 %} 13761 13762 instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13763 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13764 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13765 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} 13766 ins_encode %{ 13767 int vector_len = 0; 13768 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13769 %} 13770 ins_pipe( pipe_slow ); 13771 %} 13772 13773 instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13774 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13775 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13776 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} 13777 ins_encode %{ 13778 int vector_len = 1; 13779 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13780 %} 13781 ins_pipe( pipe_slow ); 13782 %} 13783 13784 instruct pblendvb1L(vecD dst, vecD src, rxmm0 mask) %{ 13785 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13786 match(Set dst (VectorBlend (Binary dst src) mask)); 13787 format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} 13788 ins_encode %{ 13789 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13790 %} 13791 ins_pipe( pipe_slow ); 13792 %} 13793 13794 instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13795 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13796 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13797 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} 13798 ins_encode %{ 13799 int vector_len = 0; 13800 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13801 %} 13802 ins_pipe( pipe_slow ); 13803 %} 13804 13805 instruct pblendvb2L(vecX dst, vecX src, rxmm0 mask) %{ 13806 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13807 match(Set dst (VectorBlend (Binary dst src) mask)); 13808 format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} 13809 ins_encode %{ 13810 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 13811 %} 13812 ins_pipe( pipe_slow ); 13813 %} 13814 13815 instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13816 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13817 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13818 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} 13819 ins_encode %{ 13820 int vector_len = 0; 13821 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13822 %} 13823 ins_pipe( pipe_slow ); 13824 %} 13825 13826 instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13827 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 13828 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13829 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} 13830 ins_encode %{ 13831 int vector_len = 1; 13832 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13833 %} 13834 ins_pipe( pipe_slow ); 13835 %} 13836 13837 instruct blendvpd1D(vecD dst, vecD src, rxmm0 mask) %{ 13838 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13839 match(Set dst (VectorBlend (Binary dst src) mask)); 13840 format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} 13841 ins_encode %{ 13842 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 13843 %} 13844 ins_pipe( pipe_slow ); 13845 %} 13846 13847 instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ 13848 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13849 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13850 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} 13851 ins_encode %{ 13852 int vector_len = 0; 13853 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13854 %} 13855 ins_pipe( pipe_slow ); 13856 %} 13857 13858 instruct blendvpd2D(vecX dst, vecX src, rxmm0 mask) %{ 13859 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13860 match(Set dst (VectorBlend (Binary dst src) mask)); 13861 format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} 13862 ins_encode %{ 13863 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 13864 %} 13865 ins_pipe( pipe_slow ); 13866 %} 13867 13868 instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ 13869 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13870 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13871 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} 13872 ins_encode %{ 13873 int vector_len = 0; 13874 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13875 %} 13876 ins_pipe( pipe_slow ); 13877 %} 13878 13879 instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ 13880 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 13881 match(Set dst (VectorBlend (Binary src1 src2) mask)); 13882 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} 13883 ins_encode %{ 13884 int vector_len = 1; 13885 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 13886 %} 13887 ins_pipe( pipe_slow ); 13888 %} 13889 13890 // --------------------------------- NEG -------------------------------------- 13891 // a = -a 13892 instruct vneg2I_reg(vecD dst, vecD src) %{ 13893 predicate(UseSSE > 1 && n->as_Vector()->length() == 2); 13894 match(Set dst (NegVI src)); 13895 effect(TEMP dst); 13896 format %{ "pxor $dst,$dst\n\t" 13897 "psubd $dst, $src\t! neg packed2I" %} 13898 ins_cost(150); 13899 ins_encode %{ 13900 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 13901 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 13902 %} 13903 ins_pipe( pipe_slow ); 13904 %} 13905 13906 instruct vneg4I_reg(vecX dst, vecX src) %{ 13907 predicate(UseSSE > 1 && n->as_Vector()->length() == 4); 13908 match(Set dst (NegVI src)); 13909 effect(TEMP dst); 13910 format %{ "pxor $dst,$dst\n\t" 13911 "psubd $dst, $src\t! neg packed4I" %} 13912 ins_cost(150); 13913 ins_encode %{ 13914 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 13915 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 13916 %} 13917 ins_pipe( pipe_slow ); 13918 %} 13919 13920 instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ 13921 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 13922 match(Set dst (NegVI src)); 13923 effect(TEMP tmp); 13924 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 13925 "vpsubd $dst,$tmp,$src\t! neg packed8I" %} 13926 ins_cost(150); 13927 ins_encode %{ 13928 int vector_len = 1; 13929 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13930 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 13931 %} 13932 ins_pipe( pipe_slow ); 13933 %} 13934 13935 instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ 13936 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13937 match(Set dst (NegVI src)); 13938 effect(TEMP tmp); 13939 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 13940 "vpsubd $dst,$tmp,$src\t! neg packed16I" %} 13941 ins_cost(150); 13942 ins_encode %{ 13943 int vector_len = 2; 13944 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13945 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 13946 %} 13947 ins_pipe( pipe_slow ); 13948 %} 13949 13950 instruct vneg1D(regD dst) %{ 13951 predicate((UseSSE>=2) && (UseAVX == 0)); 13952 match(Set dst (NegVD dst)); 13953 ins_cost(150); 13954 format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} 13955 ins_encode %{ 13956 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 13957 %} 13958 ins_pipe(pipe_slow); 13959 %} 13960 13961 instruct vneg1D_reg(vecX dst, vecX src) %{ 13962 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 13963 match(Set dst (NegVD src)); 13964 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} 13965 ins_cost(150); 13966 ins_encode %{ 13967 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 13968 ExternalAddress(double_signflip())); 13969 %} 13970 ins_pipe( pipe_slow ); 13971 %} 13972 13973 instruct vneg2D_reg(vecX dst) %{ 13974 predicate((UseSSE>=2)); 13975 match(Set dst (NegVD dst)); 13976 ins_cost(150); 13977 format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} 13978 ins_encode %{ 13979 __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); 13980 %} 13981 ins_pipe(pipe_slow); 13982 %} 13983 13984 13985 instruct vneg4D_reg(vecY dst, vecY src) %{ 13986 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13987 match(Set dst (NegVD src)); 13988 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} 13989 ins_cost(150); 13990 ins_encode %{ 13991 int vector_len = 1; 13992 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 13993 %} 13994 ins_pipe( pipe_slow ); 13995 %} 13996 13997 instruct vneg8D_reg(vecZ dst, vecZ src) %{ 13998 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13999 match(Set dst (NegVD src)); 14000 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} 14001 ins_cost(150); 14002 ins_encode %{ 14003 int vector_len = 2; 14004 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 14005 %} 14006 ins_pipe( pipe_slow ); 14007 %} 14008 14009 instruct vneg2F_reg(vecD dst) %{ 14010 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 14011 match(Set dst (NegVF dst)); 14012 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} 14013 ins_cost(150); 14014 ins_encode %{ 14015 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 14016 %} 14017 ins_pipe( pipe_slow ); 14018 %} 14019 14020 instruct vneg4F_reg(vecX dst) %{ 14021 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 14022 match(Set dst (NegVF dst)); 14023 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} 14024 ins_cost(150); 14025 ins_encode %{ 14026 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 14027 %} 14028 ins_pipe( pipe_slow ); 14029 %} 14030 14031 instruct vneg8F_reg(vecY dst, vecY src) %{ 14032 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14033 match(Set dst (NegVF src)); 14034 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} 14035 ins_cost(150); 14036 ins_encode %{ 14037 int vector_len = 1; 14038 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 14039 %} 14040 ins_pipe( pipe_slow ); 14041 %} 14042 14043 instruct vneg16F_reg(vecZ dst, vecZ src) %{ 14044 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14045 match(Set dst (NegVF src)); 14046 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} 14047 ins_cost(150); 14048 ins_encode %{ 14049 int vector_len = 2; 14050 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 14051 %} 14052 ins_pipe( pipe_slow ); 14053 %} 14054 14055 // --------------------------------- ABS -------------------------------------- 14056 // a = |a| 14057 instruct vabs2I_reg(vecD dst, vecD src) %{ 14058 predicate(UseSSE > 2 && n->as_Vector()->length() == 2); 14059 match(Set dst (AbsVI src)); 14060 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 14061 ins_cost(150); 14062 ins_encode %{ 14063 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 14064 %} 14065 ins_pipe( pipe_slow ); 14066 %} 14067 14068 instruct vabs4I_reg(vecX dst, vecX src) %{ 14069 predicate(UseSSE > 2 && n->as_Vector()->length() == 4); 14070 match(Set dst (AbsVI src)); 14071 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 14072 ins_cost(150); 14073 ins_encode %{ 14074 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 14075 %} 14076 ins_pipe( pipe_slow ); 14077 %} 14078 14079 instruct vabs8I_reg(vecY dst, vecY src) %{ 14080 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14081 match(Set dst (AbsVI src)); 14082 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 14083 ins_cost(150); 14084 ins_encode %{ 14085 int vector_len = 1; 14086 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14087 %} 14088 ins_pipe( pipe_slow ); 14089 %} 14090 14091 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 14092 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14093 match(Set dst (AbsVI src)); 14094 format %{ "evpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 14095 ins_cost(150); 14096 ins_encode %{ 14097 int vector_len = 2; 14098 __ evpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14099 %} 14100 ins_pipe( pipe_slow ); 14101 %} 14102 14103 instruct vabs1D_reg(vecD dst) %{ 14104 predicate(UseSSE > 0 && n->as_Vector()->length() == 1); 14105 match(Set dst (AbsVD dst)); 14106 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} 14107 ins_cost(150); 14108 ins_encode %{ 14109 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 14110 %} 14111 ins_pipe( pipe_slow ); 14112 %} 14113 14114 instruct vabs2D_reg(vecX dst) %{ 14115 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 14116 match(Set dst (AbsVD dst)); 14117 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} 14118 ins_cost(150); 14119 ins_encode %{ 14120 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 14121 %} 14122 ins_pipe( pipe_slow ); 14123 %} 14124 14125 instruct vabs4D_reg(vecY dst, vecY src) %{ 14126 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14127 match(Set dst (AbsVD src)); 14128 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} 14129 ins_cost(150); 14130 ins_encode %{ 14131 int vector_len = 1; 14132 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 14133 %} 14134 ins_pipe( pipe_slow ); 14135 %} 14136 14137 instruct vabs8D_reg(vecZ dst, vecZ src) %{ 14138 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14139 match(Set dst (AbsVD src)); 14140 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} 14141 ins_cost(150); 14142 ins_encode %{ 14143 int vector_len = 2; 14144 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 14145 %} 14146 ins_pipe( pipe_slow ); 14147 %} 14148 14149 instruct vabs2F_reg(vecD dst) %{ 14150 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 14151 match(Set dst (AbsVF dst)); 14152 format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} 14153 ins_cost(150); 14154 ins_encode %{ 14155 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 14156 %} 14157 ins_pipe( pipe_slow ); 14158 %} 14159 14160 instruct vabs4F_reg(vecX dst) %{ 14161 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 14162 match(Set dst (AbsVF dst)); 14163 format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} 14164 ins_cost(150); 14165 ins_encode %{ 14166 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 14167 %} 14168 ins_pipe( pipe_slow ); 14169 %} 14170 14171 instruct vabs8F_reg(vecY dst, vecY src) %{ 14172 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14173 match(Set dst (AbsVF src)); 14174 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} 14175 ins_cost(150); 14176 ins_encode %{ 14177 int vector_len = 1; 14178 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 14179 %} 14180 ins_pipe( pipe_slow ); 14181 %} 14182 14183 instruct vabs16F_reg(vecZ dst, vecZ src) %{ 14184 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14185 match(Set dst (AbsVF src)); 14186 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} 14187 ins_cost(150); 14188 ins_encode %{ 14189 int vector_len = 2; 14190 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 14191 %} 14192 ins_pipe( pipe_slow ); 14193 %} 14194 14195 //------------------------------------- NOT -------------------------------------------- 14196 instruct vnot4B(vecS dst, vecS src) %{ 14197 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); 14198 match(Set dst (NotV src)); 14199 effect(TEMP dst); 14200 format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} 14201 ins_encode %{ 14202 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 14203 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 14204 %} 14205 ins_pipe( pipe_slow ); 14206 %} 14207 14208 instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ 14209 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 14210 match(Set dst (NotV src)); 14211 effect(TEMP scratch); 14212 format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} 14213 ins_encode %{ 14214 int vector_len = 0; 14215 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14216 %} 14217 ins_pipe( pipe_slow ); 14218 %} 14219 14220 instruct vnot8B(vecD dst, vecD src) %{ 14221 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); 14222 match(Set dst (NotV src)); 14223 effect(TEMP dst); 14224 format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} 14225 ins_encode %{ 14226 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 14227 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 14228 %} 14229 ins_pipe( pipe_slow ); 14230 %} 14231 14232 instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ 14233 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 14234 match(Set dst (NotV src)); 14235 effect(TEMP scratch); 14236 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} 14237 ins_encode %{ 14238 int vector_len = 0; 14239 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14240 %} 14241 ins_pipe( pipe_slow ); 14242 %} 14243 14244 instruct vnot16B(vecX dst, vecX src) %{ 14245 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); 14246 match(Set dst (NotV src)); 14247 effect(TEMP dst); 14248 format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} 14249 ins_encode %{ 14250 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 14251 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 14252 %} 14253 ins_pipe( pipe_slow ); 14254 %} 14255 14256 instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ 14257 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 14258 match(Set dst (NotV src)); 14259 effect(TEMP scratch); 14260 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} 14261 ins_encode %{ 14262 int vector_len = 0; 14263 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14264 %} 14265 ins_pipe( pipe_slow ); 14266 %} 14267 14268 instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ 14269 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 14270 match(Set dst (NotV src)); 14271 effect(TEMP scratch); 14272 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} 14273 ins_encode %{ 14274 int vector_len = 1; 14275 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14276 %} 14277 ins_pipe( pipe_slow ); 14278 %} 14279 14280 instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ 14281 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 14282 match(Set dst (NotV src)); 14283 effect(TEMP scratch); 14284 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} 14285 ins_encode %{ 14286 int vector_len = 2; 14287 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14288 %} 14289 ins_pipe( pipe_slow ); 14290 %} 14291 14292 instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{ 14293 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 14294 match(Set dst (VectorTest src1 src2 )); 14295 format %{ "vptest $src1,$src2\n\t" 14296 "setb $dst\t!" %} 14297 ins_encode %{ 14298 int vector_len = 0; 14299 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14300 __ setb(Assembler::carrySet, $dst$$Register); 14301 __ movzbl($dst$$Register, $dst$$Register); 14302 %} 14303 ins_pipe( pipe_slow ); 14304 %} 14305 14306 instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{ 14307 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 14308 match(Set dst (VectorTest src1 src2 )); 14309 format %{ "vptest $src1,$src2\n\t" 14310 "setb $dst\t!" %} 14311 ins_encode %{ 14312 int vector_len = 0; 14313 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14314 __ setb(Assembler::notZero, $dst$$Register); 14315 __ movzbl($dst$$Register, $dst$$Register); 14316 %} 14317 ins_pipe( pipe_slow ); 14318 %} 14319 14320 instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{ 14321 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 14322 match(Set dst (VectorTest src1 src2 )); 14323 format %{ "vptest $src1,$src2\n\t" 14324 "setb $dst\t!" %} 14325 ins_encode %{ 14326 int vector_len = 1; 14327 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14328 __ setb(Assembler::carrySet, $dst$$Register); 14329 __ movzbl($dst$$Register, $dst$$Register); 14330 %} 14331 ins_pipe( pipe_slow ); 14332 %} 14333 14334 instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{ 14335 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 14336 match(Set dst (VectorTest src1 src2 )); 14337 format %{ "vptest $src1,$src2\n\t" 14338 "setb $dst\t!" %} 14339 ins_encode %{ 14340 int vector_len = 1; 14341 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14342 __ setb(Assembler::notZero, $dst$$Register); 14343 __ movzbl($dst$$Register, $dst$$Register); 14344 %} 14345 ins_pipe( pipe_slow ); 14346 %} 14347 14348 instruct loadmask8b(vecD dst, vecD src) %{ 14349 predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14350 match(Set dst (VectorLoadMask src)); 14351 effect(TEMP dst); 14352 format %{ "pxor $dst,$dst\n\t" 14353 "psubb $dst,$src\t! load mask (8B to 8B)" %} 14354 ins_encode %{ 14355 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14356 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14357 %} 14358 ins_pipe( pipe_slow ); 14359 %} 14360 14361 instruct loadmask16b(vecX dst, vecX src) %{ 14362 predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14363 match(Set dst (VectorLoadMask src)); 14364 effect(TEMP dst); 14365 format %{ "vpxor $dst,$dst\n\t" 14366 "vpsubb $dst,$src\t! load mask (16B to 16B)" %} 14367 ins_encode %{ 14368 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14369 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14370 %} 14371 ins_pipe( pipe_slow ); 14372 %} 14373 14374 instruct loadmask32b(vecY dst, vecY src) %{ 14375 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14376 match(Set dst (VectorLoadMask src)); 14377 effect(TEMP dst); 14378 format %{ "vpxor $dst,$dst\n\t" 14379 "vpsubb $dst,$src\t! load mask (32B to 32B)" %} 14380 ins_encode %{ 14381 int vector_len = 1; 14382 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14383 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 14384 %} 14385 ins_pipe( pipe_slow ); 14386 %} 14387 14388 instruct loadmask64b(vecZ dst, vecZ src) %{ 14389 predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14390 match(Set dst (VectorLoadMask src)); 14391 effect(TEMP dst); 14392 format %{ "vpxor $dst,$dst\n\t" 14393 "vpsubb $dst,$src\t! load mask (64B to 64B)" %} 14394 ins_encode %{ 14395 int vector_len = 2; 14396 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14397 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 14398 %} 14399 ins_pipe( pipe_slow ); 14400 %} 14401 14402 instruct loadmask4s(vecD dst, vecS src) %{ 14403 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14404 match(Set dst (VectorLoadMask src)); 14405 effect(TEMP dst); 14406 format %{ "pxor $dst,$dst\n\t" 14407 "psubb $dst,$src\n\t" 14408 "pmovsxbw $dst\t! load mask (4B to 4S)" %} 14409 ins_encode %{ 14410 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14411 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14412 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 14413 %} 14414 ins_pipe( pipe_slow ); 14415 %} 14416 14417 instruct loadmask8s(vecX dst, vecD src) %{ 14418 predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14419 match(Set dst (VectorLoadMask src)); 14420 effect(TEMP dst); 14421 format %{ "pxor $dst,$dst\n\t" 14422 "psubb $dst,$src\n\t" 14423 "pmovsxbw $dst\t! load mask (8B to 8S)" %} 14424 ins_encode %{ 14425 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14426 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14427 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 14428 %} 14429 ins_pipe( pipe_slow ); 14430 %} 14431 14432 instruct loadmask16s(vecY dst, vecX src) %{ 14433 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14434 match(Set dst (VectorLoadMask src)); 14435 effect(TEMP dst); 14436 format %{ "vpxor $dst,$dst\n\t" 14437 "vpsubb $dst,$src\n\t" 14438 "vpmovsxbw $dst\t! load mask (16B to 16S)" %} 14439 ins_encode %{ 14440 int vector_len = 1; 14441 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14442 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 14443 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14444 %} 14445 ins_pipe( pipe_slow ); 14446 %} 14447 14448 instruct loadmask32s(vecZ dst, vecY src) %{ 14449 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14450 match(Set dst (VectorLoadMask src)); 14451 effect(TEMP dst); 14452 format %{ "vpxor $dst,$dst\n\t" 14453 "vpsubb $dst,$src\n\t" 14454 "vpmovsxbw $dst\t! load mask (32B to 32S)" %} 14455 ins_encode %{ 14456 int vector_len = 2; 14457 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); 14458 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); 14459 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14460 %} 14461 ins_pipe( pipe_slow ); 14462 %} 14463 14464 instruct loadmask2i(vecD dst, vecS src) %{ 14465 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14466 match(Set dst (VectorLoadMask src)); 14467 effect(TEMP dst); 14468 format %{ "pxor $dst,$dst\n\t" 14469 "psubb $dst,$src\n\t" 14470 "pmovsxbd $dst\t! load mask (2B to 2I)" %} 14471 ins_encode %{ 14472 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14473 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14474 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 14475 %} 14476 ins_pipe( pipe_slow ); 14477 %} 14478 14479 instruct loadmask4i(vecX dst, vecS src) %{ 14480 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14481 match(Set dst (VectorLoadMask src)); 14482 effect(TEMP dst); 14483 format %{ "pxor $dst,$dst\n\t" 14484 "psubb $dst,$src\n\t" 14485 "pmovsxbd $dst\t! load mask (4B to 4I)" %} 14486 ins_encode %{ 14487 int vector_len = 0; 14488 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14489 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14490 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 14491 %} 14492 ins_pipe( pipe_slow ); 14493 %} 14494 14495 instruct loadmask8i(vecY dst, vecD src) %{ 14496 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14497 match(Set dst (VectorLoadMask src)); 14498 effect(TEMP dst); 14499 format %{ "vpxor $dst,$dst\n\t" 14500 "vpsubb $dst,$src\n\t" 14501 "vpmovsxbd $dst\t! load mask (8B to 8I)" %} 14502 ins_encode %{ 14503 int vector_len = 1; 14504 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14505 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 14506 __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14507 %} 14508 ins_pipe( pipe_slow ); 14509 %} 14510 14511 instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ 14512 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14513 match(Set dst (VectorLoadMask src)); 14514 effect(TEMP dst, TEMP tmp); 14515 format %{ "vpxor $dst,$dst\n\t" 14516 "vpmovzxbd $tmp,$src\n\t" 14517 "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} 14518 ins_encode %{ 14519 int vector_len = 2; 14520 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14521 __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 14522 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14523 %} 14524 ins_pipe( pipe_slow ); 14525 %} 14526 14527 instruct loadmask1l(vecD dst, vecS src) %{ 14528 predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14529 match(Set dst (VectorLoadMask src)); 14530 effect(TEMP dst); 14531 format %{ "pxor $dst,$dst\n\t" 14532 "psubb $dst,$src\n\t" 14533 "pmovsxbq $dst\t! load mask (1B to 1L)" %} 14534 ins_encode %{ 14535 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14536 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14537 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 14538 %} 14539 ins_pipe( pipe_slow ); 14540 %} 14541 14542 instruct loadmask2l(vecX dst, vecS src) %{ 14543 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14544 match(Set dst (VectorLoadMask src)); 14545 effect(TEMP dst); 14546 format %{ "pxor $dst,$dst\n\t" 14547 "psubb $dst,$src\n\t" 14548 "pmovsxbq $dst\t! load mask (2B to 2L)" %} 14549 ins_encode %{ 14550 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 14551 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 14552 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 14553 %} 14554 ins_pipe( pipe_slow ); 14555 %} 14556 14557 instruct loadmask4l(vecY dst, vecS src) %{ 14558 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14559 match(Set dst (VectorLoadMask src)); 14560 effect(TEMP dst); 14561 format %{ "vpxor $dst,$dst\n\t" 14562 "vpsubb $dst,$src\n\t" 14563 "vpmovsxbq $dst\t! load mask (4B to 4L)" %} 14564 ins_encode %{ 14565 int vector_len = 1; 14566 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14567 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 14568 __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14569 %} 14570 ins_pipe( pipe_slow ); 14571 %} 14572 14573 instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ 14574 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14575 match(Set dst (VectorLoadMask src)); 14576 effect(TEMP dst, TEMP tmp); 14577 format %{ "vpxor $dst,$dst\n\t" 14578 "vpmovzxbq $tmp,$src\n\t" 14579 "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} 14580 ins_encode %{ 14581 int vector_len = 2; 14582 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14583 __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 14584 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14585 %} 14586 ins_pipe( pipe_slow ); 14587 %} 14588 14589 instruct storemask8b(vecD dst, vecD src) %{ 14590 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14591 match(Set dst (VectorStoreMask src)); 14592 format %{ "vpabsb $dst,$src\t! store mask (8B to 8B)" %} 14593 ins_encode %{ 14594 int vector_len = 0; 14595 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14596 %} 14597 ins_pipe( pipe_slow ); 14598 %} 14599 14600 instruct storemask16b(vecX dst, vecX src) %{ 14601 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14602 match(Set dst (VectorStoreMask src)); 14603 format %{ "vpabsb $dst,$src\t! store mask (16B to 16B)" %} 14604 ins_encode %{ 14605 int vector_len = 0; 14606 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14607 %} 14608 ins_pipe( pipe_slow ); 14609 %} 14610 14611 instruct storemask32b(vecY dst, vecY src) %{ 14612 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14613 match(Set dst (VectorStoreMask src)); 14614 format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} 14615 ins_encode %{ 14616 int vector_len = 1; 14617 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14618 %} 14619 ins_pipe( pipe_slow ); 14620 %} 14621 14622 instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ 14623 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 14624 match(Set dst (VectorStoreMask src)); 14625 effect(TEMP scratch); 14626 format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" 14627 "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} 14628 ins_encode %{ 14629 int vector_len = 2; 14630 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14631 Assembler::ComparisonPredicate cp = Assembler::eq; 14632 __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 14633 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, vector_len, $scratch$$Register); 14634 %} 14635 ins_pipe( pipe_slow ); 14636 %} 14637 14638 instruct storemask4s(vecS dst, vecD src) %{ 14639 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14640 match(Set dst (VectorStoreMask src)); 14641 format %{ "vpabsw $dst,$src\n\t" 14642 "vpackuswb $dst,$dst,$dst\t! store mask (4S to 4B)" %} 14643 ins_encode %{ 14644 int vector_len = 0; 14645 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14646 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14647 %} 14648 ins_pipe( pipe_slow ); 14649 %} 14650 14651 instruct storemask8s(vecD dst, vecX src) %{ 14652 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14653 match(Set dst (VectorStoreMask src)); 14654 format %{ "vpabsw $dst,$src\n\t" 14655 "vpackuswb $dst,$dst,$dst\t! store mask (8S to 8B)" %} 14656 ins_encode %{ 14657 int vector_len = 0; 14658 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14659 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14660 %} 14661 ins_pipe( pipe_slow ); 14662 %} 14663 14664 instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ 14665 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14666 match(Set dst (VectorStoreMask src)); 14667 effect(TEMP dst, TEMP tmp); 14668 format %{ "vpabsw $dst,$src\n\t" 14669 "vextracti128 $tmp,$dst\n\t" 14670 "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} 14671 ins_encode %{ 14672 int vector_len = 1; 14673 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14674 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14675 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14676 %} 14677 ins_pipe( pipe_slow ); 14678 %} 14679 14680 instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ 14681 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 14682 match(Set dst (VectorStoreMask src)); 14683 effect(TEMP scratch); 14684 format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" 14685 "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} 14686 ins_encode %{ 14687 int vector_len = 2; 14688 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14689 Assembler::ComparisonPredicate cp = Assembler::eq; 14690 __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 14691 // The dst is 256-bit - thus we can do a smaller move. 14692 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 1, $scratch$$Register); 14693 %} 14694 ins_pipe( pipe_slow ); 14695 %} 14696 14697 14698 instruct storemask2i(vecS dst, vecD src) %{ 14699 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14700 match(Set dst (VectorStoreMask src)); 14701 format %{ "vpabsd $dst,$src\n\t" 14702 "vpackusdw $dst,$dst,$dst\n\t" 14703 "vpackuswb $dst,$dst,$dst\t! store mask (2I to 2B)" %} 14704 ins_encode %{ 14705 int vector_len = 0; 14706 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14707 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14708 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14709 %} 14710 ins_pipe( pipe_slow ); 14711 %} 14712 14713 instruct storemask4i(vecS dst, vecX src) %{ 14714 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14715 match(Set dst (VectorStoreMask src)); 14716 format %{ "vpabsd $dst,$src\n\t" 14717 "vpackusdw $dst,$dst,$dst\n\t" 14718 "vpackuswb $dst,$dst,$dst\t! store mask (4I to 4B)" %} 14719 ins_encode %{ 14720 int vector_len = 0; 14721 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14722 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14723 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14724 %} 14725 ins_pipe( pipe_slow ); 14726 %} 14727 14728 instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ 14729 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14730 match(Set dst (VectorStoreMask src)); 14731 effect(TEMP dst, TEMP tmp); 14732 format %{ "vpxor $dst,$dst\n\t" 14733 "vpsubd $dst,$src\n\t" 14734 "vextracti128 $tmp,$dst\n\t" 14735 "vpackusdw $dst,$dst,$tmp\n\t" 14736 "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} 14737 ins_encode %{ 14738 int vector_len = 1; 14739 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14740 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 14741 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14742 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14743 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14744 %} 14745 ins_pipe( pipe_slow ); 14746 %} 14747 14748 instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ 14749 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 14750 match(Set dst (VectorStoreMask src)); 14751 effect(TEMP scratch); 14752 format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" 14753 "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} 14754 ins_encode %{ 14755 int vector_len = 2; 14756 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14757 __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14758 // The dst is only 128-bit - thus we can do a smaller move. 14759 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 14760 %} 14761 ins_pipe( pipe_slow ); 14762 %} 14763 14764 instruct storemask1l(vecS dst, vecD src) %{ 14765 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 14766 match(Set dst (VectorStoreMask src)); 14767 format %{ "vpabsd $dst,$src\n\t" 14768 "vpackusdw $dst,$dst,$dst\n\t" 14769 "vpackuswb $dst,$dst,$dst\t! store mask (1L to 1B)" %} 14770 ins_encode %{ 14771 int vector_len = 0; 14772 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14773 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14774 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14775 %} 14776 ins_pipe( pipe_slow ); 14777 %} 14778 14779 instruct storemask2l(vecS dst, vecX src) %{ 14780 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 14781 match(Set dst (VectorStoreMask src)); 14782 format %{ "vpshufd $dst,$src,0x8\n\t" 14783 "vpabsd $dst,$dst\n\t" 14784 "vpackusdw $dst,$dst,$dst\n\t" 14785 "vpackuswb $dst,$dst,$dst\t! store mask (2L to 2B)" %} 14786 ins_encode %{ 14787 int vector_len = 0; 14788 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8, vector_len); 14789 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14790 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14791 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14792 %} 14793 ins_pipe( pipe_slow ); 14794 %} 14795 14796 instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ 14797 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 14798 match(Set dst (VectorStoreMask src)); 14799 effect(TEMP scratch, TEMP dst); 14800 format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" 14801 "vpermd $dst,$dst,$src," 14802 "vpabsd $dst,$dst\n\t" 14803 "vpackusdw $dst,$dst,$dst\n\t" 14804 "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} 14805 ins_encode %{ 14806 // vpermd and load are 256-bit, but all others are 128-bit instructions. 14807 int vector_len = 0; 14808 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); 14809 __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister); 14810 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14811 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14812 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14813 %} 14814 ins_pipe( pipe_slow ); 14815 %} 14816 14817 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ 14818 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 14819 match(Set dst (VectorStoreMask src)); 14820 effect(TEMP scratch); 14821 format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" 14822 "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} 14823 ins_encode %{ 14824 int vector_len = 2; 14825 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14826 Assembler::ComparisonPredicate cp = Assembler::eq; 14827 __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 14828 // The dst is only 128-bit - thus we can do a smaller move. 14829 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 14830 %} 14831 ins_pipe( pipe_slow ); 14832 %} 14833 14834 // --------------------------------- FMA -------------------------------------- 14835 14836 // a * b + c 14837 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 14838 predicate(UseFMA && n->as_Vector()->length() == 2); 14839 match(Set c (FmaVD c (Binary a b))); 14840 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 14841 ins_cost(150); 14842 ins_encode %{ 14843 int vector_len = 0; 14844 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 14845 %} 14846 ins_pipe( pipe_slow ); 14847 %} 14848 14849 // a * b + c 14850 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 14851 predicate(UseFMA && n->as_Vector()->length() == 2); 14852 match(Set c (FmaVD c (Binary a (LoadVector b)))); 14853 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 14854 ins_cost(150); 14855 ins_encode %{ 14856 int vector_len = 0; 14857 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 14858 %} 14859 ins_pipe( pipe_slow ); 14860 %} 14861 14862 14863 // a * b + c 14864 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 14865 predicate(UseFMA && n->as_Vector()->length() == 4); 14866 match(Set c (FmaVD c (Binary a b))); 14867 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 14868 ins_cost(150); 14869 ins_encode %{ 14870 int vector_len = 1; 14871 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 14872 %} 14873 ins_pipe( pipe_slow ); 14874 %} 14875 14876 // a * b + c 14877 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 14878 predicate(UseFMA && n->as_Vector()->length() == 4); 14879 match(Set c (FmaVD c (Binary a (LoadVector b)))); 14880 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 14881 ins_cost(150); 14882 ins_encode %{ 14883 int vector_len = 1; 14884 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 14885 %} 14886 ins_pipe( pipe_slow ); 14887 %} 14888 14889 // a * b + c 14890 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 14891 predicate(UseFMA && n->as_Vector()->length() == 8); 14892 match(Set c (FmaVD c (Binary a b))); 14893 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 14894 ins_cost(150); 14895 ins_encode %{ 14896 int vector_len = 2; 14897 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 14898 %} 14899 ins_pipe( pipe_slow ); 14900 %} 14901 14902 // a * b + c 14903 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 14904 predicate(UseFMA && n->as_Vector()->length() == 8); 14905 match(Set c (FmaVD c (Binary a (LoadVector b)))); 14906 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 14907 ins_cost(150); 14908 ins_encode %{ 14909 int vector_len = 2; 14910 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 14911 %} 14912 ins_pipe( pipe_slow ); 14913 %} 14914 14915 // a * b + c 14916 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 14917 predicate(UseFMA && n->as_Vector()->length() == 4); 14918 match(Set c (FmaVF c (Binary a b))); 14919 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 14920 ins_cost(150); 14921 ins_encode %{ 14922 int vector_len = 0; 14923 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 14924 %} 14925 ins_pipe( pipe_slow ); 14926 %} 14927 14928 // a * b + c 14929 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 14930 predicate(UseFMA && n->as_Vector()->length() == 4); 14931 match(Set c (FmaVF c (Binary a (LoadVector b)))); 14932 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 14933 ins_cost(150); 14934 ins_encode %{ 14935 int vector_len = 0; 14936 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 14937 %} 14938 ins_pipe( pipe_slow ); 14939 %} 14940 14941 // a * b + c 14942 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 14943 predicate(UseFMA && n->as_Vector()->length() == 8); 14944 match(Set c (FmaVF c (Binary a b))); 14945 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 14946 ins_cost(150); 14947 ins_encode %{ 14948 int vector_len = 1; 14949 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 14950 %} 14951 ins_pipe( pipe_slow ); 14952 %} 14953 14954 // a * b + c 14955 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 14956 predicate(UseFMA && n->as_Vector()->length() == 8); 14957 match(Set c (FmaVF c (Binary a (LoadVector b)))); 14958 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 14959 ins_cost(150); 14960 ins_encode %{ 14961 int vector_len = 1; 14962 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 14963 %} 14964 ins_pipe( pipe_slow ); 14965 %} 14966 14967 // a * b + c 14968 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 14969 predicate(UseFMA && n->as_Vector()->length() == 16); 14970 match(Set c (FmaVF c (Binary a b))); 14971 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 14972 ins_cost(150); 14973 ins_encode %{ 14974 int vector_len = 2; 14975 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 14976 %} 14977 ins_pipe( pipe_slow ); 14978 %} 14979 14980 // a * b + c 14981 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 14982 predicate(UseFMA && n->as_Vector()->length() == 16); 14983 match(Set c (FmaVF c (Binary a (LoadVector b)))); 14984 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 14985 ins_cost(150); 14986 ins_encode %{ 14987 int vector_len = 2; 14988 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 14989 %} 14990 ins_pipe( pipe_slow ); 14991 %}