1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 %} 1075 1076 1077 //----------SOURCE BLOCK------------------------------------------------------- 1078 // This is a block of C++ code which provides values, functions, and 1079 // definitions necessary in the rest of the architecture description 1080 1081 source_hpp %{ 1082 // Header information of the source block. 1083 // Method declarations/definitions which are used outside 1084 // the ad-scope can conveniently be defined here. 1085 // 1086 // To keep related declarations/definitions/uses close together, 1087 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1088 1089 class NativeJump; 1090 1091 class CallStubImpl { 1092 1093 //-------------------------------------------------------------- 1094 //---< Used for optimization in Compile::shorten_branches >--- 1095 //-------------------------------------------------------------- 1096 1097 public: 1098 // Size of call trampoline stub. 1099 static uint size_call_trampoline() { 1100 return 0; // no call trampolines on this platform 1101 } 1102 1103 // number of relocations needed by a call trampoline stub 1104 static uint reloc_call_trampoline() { 1105 return 0; // no call trampolines on this platform 1106 } 1107 }; 1108 1109 class HandlerImpl { 1110 1111 public: 1112 1113 static int emit_exception_handler(CodeBuffer &cbuf); 1114 static int emit_deopt_handler(CodeBuffer& cbuf); 1115 1116 static uint size_exception_handler() { 1117 // NativeCall instruction size is the same as NativeJump. 1118 // exception handler starts out as jump and can be patched to 1119 // a call be deoptimization. (4932387) 1120 // Note that this value is also credited (in output.cpp) to 1121 // the size of the code section. 1122 return NativeJump::instruction_size; 1123 } 1124 1125 #ifdef _LP64 1126 static uint size_deopt_handler() { 1127 // three 5 byte instructions 1128 return 15; 1129 } 1130 #else 1131 static uint size_deopt_handler() { 1132 // NativeCall instruction size is the same as NativeJump. 1133 // exception handler starts out as jump and can be patched to 1134 // a call be deoptimization. (4932387) 1135 // Note that this value is also credited (in output.cpp) to 1136 // the size of the code section. 1137 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1138 } 1139 #endif 1140 }; 1141 1142 %} // end source_hpp 1143 1144 source %{ 1145 1146 #include "opto/addnode.hpp" 1147 1148 // Emit exception handler code. 1149 // Stuff framesize into a register and call a VM stub routine. 1150 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1151 1152 // Note that the code buffer's insts_mark is always relative to insts. 1153 // That's why we must use the macroassembler to generate a handler. 1154 MacroAssembler _masm(&cbuf); 1155 address base = __ start_a_stub(size_exception_handler()); 1156 if (base == NULL) { 1157 ciEnv::current()->record_failure("CodeCache is full"); 1158 return 0; // CodeBuffer::expand failed 1159 } 1160 int offset = __ offset(); 1161 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1162 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1163 __ end_a_stub(); 1164 return offset; 1165 } 1166 1167 // Emit deopt handler code. 1168 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1169 1170 // Note that the code buffer's insts_mark is always relative to insts. 1171 // That's why we must use the macroassembler to generate a handler. 1172 MacroAssembler _masm(&cbuf); 1173 address base = __ start_a_stub(size_deopt_handler()); 1174 if (base == NULL) { 1175 ciEnv::current()->record_failure("CodeCache is full"); 1176 return 0; // CodeBuffer::expand failed 1177 } 1178 int offset = __ offset(); 1179 1180 #ifdef _LP64 1181 address the_pc = (address) __ pc(); 1182 Label next; 1183 // push a "the_pc" on the stack without destroying any registers 1184 // as they all may be live. 1185 1186 // push address of "next" 1187 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1188 __ bind(next); 1189 // adjust it so it matches "the_pc" 1190 __ subptr(Address(rsp, 0), __ offset() - offset); 1191 #else 1192 InternalAddress here(__ pc()); 1193 __ pushptr(here.addr()); 1194 #endif 1195 1196 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1197 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1198 __ end_a_stub(); 1199 return offset; 1200 } 1201 1202 1203 //============================================================================= 1204 1205 // Float masks come from different places depending on platform. 1206 #ifdef _LP64 1207 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1208 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1209 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1210 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1211 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1212 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1213 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1214 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1215 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1216 static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } 1217 static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } 1218 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1219 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1220 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1221 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1222 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1223 #else 1224 static address float_signmask() { return (address)float_signmask_pool; } 1225 static address float_signflip() { return (address)float_signflip_pool; } 1226 static address double_signmask() { return (address)double_signmask_pool; } 1227 static address double_signflip() { return (address)double_signflip_pool; } 1228 #endif 1229 1230 1231 const bool Matcher::match_rule_supported(int opcode) { 1232 if (!has_match_rule(opcode)) 1233 return false; 1234 1235 bool ret_value = true; 1236 switch (opcode) { 1237 case Op_PopCountI: 1238 case Op_PopCountL: 1239 if (!UsePopCountInstruction) 1240 ret_value = false; 1241 break; 1242 case Op_PopCountVI: 1243 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1244 ret_value = false; 1245 break; 1246 case Op_MulVB: 1247 case Op_MulVI: 1248 case Op_MulVL: 1249 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1250 ret_value = false; 1251 break; 1252 case Op_MulReductionVL: 1253 if (VM_Version::supports_avx512dq() == false) 1254 ret_value = false; 1255 break; 1256 case Op_AddReductionVL: 1257 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1258 ret_value = false; 1259 break; 1260 case Op_AddReductionVI: 1261 if (UseSSE < 3) // requires at least SSE3 1262 ret_value = false; 1263 break; 1264 case Op_MulReductionVI: 1265 if (UseSSE < 4) // requires at least SSE4 1266 ret_value = false; 1267 break; 1268 case Op_AddReductionVF: 1269 case Op_AddReductionVD: 1270 case Op_MulReductionVF: 1271 case Op_MulReductionVD: 1272 if (UseSSE < 1) // requires at least SSE 1273 ret_value = false; 1274 break; 1275 case Op_AndReductionV: 1276 if (UseSSE < 2) // requires at least SSE2 1277 ret_value = false; 1278 break; 1279 case Op_SqrtVD: 1280 case Op_SqrtVF: 1281 if (UseAVX < 1) // enabled for AVX only 1282 ret_value = false; 1283 break; 1284 case Op_CompareAndSwapL: 1285 #ifdef _LP64 1286 case Op_CompareAndSwapP: 1287 #endif 1288 if (!VM_Version::supports_cx8()) 1289 ret_value = false; 1290 break; 1291 case Op_CMoveVF: 1292 case Op_CMoveVD: 1293 if (UseAVX < 1 || UseAVX > 2) 1294 ret_value = false; 1295 break; 1296 case Op_StrIndexOf: 1297 if (!UseSSE42Intrinsics) 1298 ret_value = false; 1299 break; 1300 case Op_StrIndexOfChar: 1301 if (!UseSSE42Intrinsics) 1302 ret_value = false; 1303 break; 1304 case Op_OnSpinWait: 1305 if (VM_Version::supports_on_spin_wait() == false) 1306 ret_value = false; 1307 break; 1308 } 1309 1310 return ret_value; // Per default match rules are supported. 1311 } 1312 1313 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt, int op_arity) { 1314 // identify extra cases that we might want to provide match rules for 1315 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1316 bool ret_value = match_rule_supported(opcode); 1317 if (ret_value) { 1318 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1319 if (!vector_size_supported(bt, vlen)) { 1320 ret_value = false; 1321 } else if (size_in_bits > 256 && UseAVX <= 2) { 1322 // Only AVX512 supports 512-bit vectors 1323 ret_value = false; 1324 } else if (UseAVX == 0 && size_in_bits > 128) { 1325 // Only AVX supports 256-bit vectors 1326 ret_value = false; 1327 } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { 1328 // Byte and Short types are not supported in AVX512 if AVX512BW is not true. 1329 ret_value = false; 1330 } else { 1331 switch (opcode) { 1332 case Op_AbsV: 1333 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1334 else if (bt == T_LONG && UseAVX <= 2) { ret_value = false; } // Implementation limitation 1335 break; 1336 case Op_AddVB: 1337 case Op_SubVB: 1338 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1339 ret_value = false; 1340 break; 1341 case Op_MaxV: 1342 case Op_MinV: 1343 if ((UseSSE < 4 && (bt == T_BYTE || bt == T_INT)) || (UseAVX < 1 && bt == T_LONG)) 1344 ret_value = false; 1345 break; 1346 case Op_LShiftVI: 1347 case Op_RShiftVI: 1348 case Op_URShiftVI: 1349 if (op_arity == 2 && UseAVX <= 1) 1350 ret_value = false; 1351 break; 1352 case Op_LShiftVL: 1353 case Op_RShiftVL: 1354 case Op_URShiftVL: 1355 if (op_arity == 2 && UseAVX <= 1) 1356 ret_value = false; 1357 break; 1358 case Op_URShiftVS: 1359 case Op_RShiftVS: 1360 case Op_LShiftVS: 1361 case Op_MulVS: 1362 case Op_AddVS: 1363 case Op_SubVS: 1364 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1365 ret_value = false; 1366 break; 1367 case Op_CMoveVF: 1368 if (vlen != 8) 1369 ret_value = false; 1370 break; 1371 case Op_CMoveVD: 1372 if (vlen != 4) 1373 ret_value = false; 1374 break; 1375 case Op_VectorMaskCmp: 1376 if (UseAVX <= 0) { ret_value = false; } 1377 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1378 break; 1379 case Op_VectorBlend: 1380 if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } 1381 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1382 break; 1383 case Op_VectorTest: 1384 if (UseAVX <= 0) { ret_value = false; } 1385 else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation 1386 break; 1387 case Op_VectorLoadMask: 1388 if (UseSSE <= 3) { ret_value = false; } 1389 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1390 break; 1391 case Op_VectorStoreMask: 1392 if (UseAVX < 2) { ret_value = false; } // Implementation limitation 1393 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1394 else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation 1395 break; 1396 case Op_VectorCastB2X: 1397 if (UseAVX <= 0) { ret_value = false; } 1398 else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } 1399 break; 1400 case Op_VectorCastS2X: 1401 if (UseAVX <= 0) { ret_value = false; } 1402 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1403 else if (is_integral_type(bt) && vlen * type2aelembytes(T_SHORT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1404 break; 1405 case Op_VectorCastI2X: 1406 if (UseAVX <= 0) { ret_value = false; } 1407 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1408 else if (is_integral_type(bt) && vlen * type2aelembytes(T_INT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1409 break; 1410 case Op_VectorCastL2X: 1411 if (UseAVX <= 0) { ret_value = false; } 1412 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1413 else if (is_integral_type(bt) && vlen * type2aelembytes(T_LONG) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1414 else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { ret_value = false; } 1415 break; 1416 case Op_VectorCastF2X: 1417 // Casts from FP to integral types require special fixup logic not easily 1418 // implementable with vectors. 1419 if (UseAVX <= 0) { ret_value = false; } 1420 else if (bt != T_DOUBLE) { ret_value = false; } // Implementation limitation 1421 break; 1422 case Op_VectorCastD2X: 1423 // Casts from FP to integral types require special fixup logic not easily 1424 // implementable with vectors. 1425 if (UseAVX <= 0) { ret_value = false; } 1426 else if (bt != T_FLOAT) { ret_value = false; } // Implementation limitation 1427 break; 1428 case Op_VectorReinterpret: 1429 if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } 1430 break; 1431 default: 1432 break; 1433 } 1434 } 1435 } 1436 if (ret_value) { 1437 assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && 1438 vector_size_supported(bt, vlen), "must be supported"); 1439 } 1440 1441 return ret_value; // Per default match rules are supported. 1442 } 1443 1444 const bool Matcher::has_predicated_vectors(void) { 1445 bool ret_value = false; 1446 if (UseAVX > 2) { 1447 ret_value = VM_Version::supports_avx512vl(); 1448 } 1449 1450 return ret_value; 1451 } 1452 1453 const int Matcher::float_pressure(int default_pressure_threshold) { 1454 int float_pressure_threshold = default_pressure_threshold; 1455 #ifdef _LP64 1456 if (UseAVX > 2) { 1457 // Increase pressure threshold on machines with AVX3 which have 1458 // 2x more XMM registers. 1459 float_pressure_threshold = default_pressure_threshold * 2; 1460 } 1461 #endif 1462 return float_pressure_threshold; 1463 } 1464 1465 // Max vector size in bytes. 0 if not supported. 1466 const int Matcher::vector_width_in_bytes(BasicType bt) { 1467 assert(is_java_primitive(bt), "only primitive type vectors"); 1468 if (UseSSE < 2) return 0; 1469 // SSE2 supports 128bit vectors for all types. 1470 // AVX2 supports 256bit vectors for all types. 1471 // AVX2/EVEX supports 512bit vectors for all types. 1472 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1473 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1474 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1475 size = (UseAVX > 2) ? 64 : 32; 1476 // Use flag to limit vector size. 1477 size = MIN2(size,(int)MaxVectorSize); 1478 // Minimum 2 values in vector (or 4 for bytes). 1479 switch (bt) { 1480 case T_DOUBLE: 1481 case T_LONG: 1482 if (size < 16) return 0; 1483 break; 1484 case T_FLOAT: 1485 case T_INT: 1486 if (size < 8) return 0; 1487 break; 1488 case T_BOOLEAN: 1489 if (size < 4) return 0; 1490 break; 1491 case T_CHAR: 1492 if (size < 4) return 0; 1493 break; 1494 case T_BYTE: 1495 if (size < 4) return 0; 1496 break; 1497 case T_SHORT: 1498 if (size < 4) return 0; 1499 break; 1500 default: 1501 ShouldNotReachHere(); 1502 } 1503 return size; 1504 } 1505 1506 // Limits on vector size (number of elements) loaded into vector. 1507 const int Matcher::max_vector_size(const BasicType bt) { 1508 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1509 } 1510 const int Matcher::min_vector_size(const BasicType bt) { 1511 int max_size = max_vector_size(bt); 1512 // Min size which can be loaded into vector is 4 bytes. 1513 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1514 return MIN2(size,max_size); 1515 } 1516 1517 // Vector ideal reg corresponding to specidied size in bytes 1518 const uint Matcher::vector_ideal_reg(int size) { 1519 assert(MaxVectorSize >= size, ""); 1520 switch(size) { 1521 case 4: return Op_VecS; 1522 case 8: return Op_VecD; 1523 case 16: return Op_VecX; 1524 case 32: return Op_VecY; 1525 case 64: return Op_VecZ; 1526 } 1527 ShouldNotReachHere(); 1528 return 0; 1529 } 1530 1531 // Only lowest bits of xmm reg are used for vector shift count. 1532 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1533 return Op_VecS; 1534 } 1535 1536 // x86 supports misaligned vectors store/load. 1537 const bool Matcher::misaligned_vectors_ok() { 1538 return !AlignVector; // can be changed by flag 1539 } 1540 1541 // x86 AES instructions are compatible with SunJCE expanded 1542 // keys, hence we do not need to pass the original key to stubs 1543 const bool Matcher::pass_original_key_for_aes() { 1544 return false; 1545 } 1546 1547 1548 const bool Matcher::convi2l_type_required = true; 1549 1550 // Check for shift by small constant as well 1551 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1552 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1553 shift->in(2)->get_int() <= 3 && 1554 // Are there other uses besides address expressions? 1555 !matcher->is_visited(shift)) { 1556 address_visited.set(shift->_idx); // Flag as address_visited 1557 mstack.push(shift->in(2), Matcher::Visit); 1558 Node *conv = shift->in(1); 1559 #ifdef _LP64 1560 // Allow Matcher to match the rule which bypass 1561 // ConvI2L operation for an array index on LP64 1562 // if the index value is positive. 1563 if (conv->Opcode() == Op_ConvI2L && 1564 conv->as_Type()->type()->is_long()->_lo >= 0 && 1565 // Are there other uses besides address expressions? 1566 !matcher->is_visited(conv)) { 1567 address_visited.set(conv->_idx); // Flag as address_visited 1568 mstack.push(conv->in(1), Matcher::Pre_Visit); 1569 } else 1570 #endif 1571 mstack.push(conv, Matcher::Pre_Visit); 1572 return true; 1573 } 1574 return false; 1575 } 1576 1577 // Should the Matcher clone shifts on addressing modes, expecting them 1578 // to be subsumed into complex addressing expressions or compute them 1579 // into registers? 1580 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1581 Node *off = m->in(AddPNode::Offset); 1582 if (off->is_Con()) { 1583 address_visited.test_set(m->_idx); // Flag as address_visited 1584 Node *adr = m->in(AddPNode::Address); 1585 1586 // Intel can handle 2 adds in addressing mode 1587 // AtomicAdd is not an addressing expression. 1588 // Cheap to find it by looking for screwy base. 1589 if (adr->is_AddP() && 1590 !adr->in(AddPNode::Base)->is_top() && 1591 // Are there other uses besides address expressions? 1592 !is_visited(adr)) { 1593 address_visited.set(adr->_idx); // Flag as address_visited 1594 Node *shift = adr->in(AddPNode::Offset); 1595 if (!clone_shift(shift, this, mstack, address_visited)) { 1596 mstack.push(shift, Pre_Visit); 1597 } 1598 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1599 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1600 } else { 1601 mstack.push(adr, Pre_Visit); 1602 } 1603 1604 // Clone X+offset as it also folds into most addressing expressions 1605 mstack.push(off, Visit); 1606 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1607 return true; 1608 } else if (clone_shift(off, this, mstack, address_visited)) { 1609 address_visited.test_set(m->_idx); // Flag as address_visited 1610 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1611 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1612 return true; 1613 } 1614 return false; 1615 } 1616 1617 void Compile::reshape_address(AddPNode* addp) { 1618 } 1619 1620 // Helper methods for MachSpillCopyNode::implementation(). 1621 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1622 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1623 // In 64-bit VM size calculation is very complex. Emitting instructions 1624 // into scratch buffer is used to get size in 64-bit VM. 1625 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1626 assert(ireg == Op_VecS || // 32bit vector 1627 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1628 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1629 "no non-adjacent vector moves" ); 1630 if (cbuf) { 1631 MacroAssembler _masm(cbuf); 1632 int offset = __ offset(); 1633 switch (ireg) { 1634 case Op_VecS: // copy whole register 1635 case Op_VecD: 1636 case Op_VecX: 1637 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1638 break; 1639 case Op_VecY: 1640 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1641 break; 1642 case Op_VecZ: 1643 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1644 break; 1645 default: 1646 ShouldNotReachHere(); 1647 } 1648 int size = __ offset() - offset; 1649 #ifdef ASSERT 1650 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1651 assert(!do_size || size == 4, "incorrect size calculattion"); 1652 #endif 1653 return size; 1654 #ifndef PRODUCT 1655 } else if (!do_size) { 1656 switch (ireg) { 1657 case Op_VecS: 1658 case Op_VecD: 1659 case Op_VecX: 1660 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1661 break; 1662 case Op_VecY: 1663 case Op_VecZ: 1664 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1665 break; 1666 default: 1667 ShouldNotReachHere(); 1668 } 1669 #endif 1670 } 1671 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1672 return (UseAVX > 2) ? 6 : 4; 1673 } 1674 1675 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1676 int stack_offset, int reg, uint ireg, outputStream* st) { 1677 // In 64-bit VM size calculation is very complex. Emitting instructions 1678 // into scratch buffer is used to get size in 64-bit VM. 1679 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1680 if (cbuf) { 1681 MacroAssembler _masm(cbuf); 1682 int offset = __ offset(); 1683 if (is_load) { 1684 switch (ireg) { 1685 case Op_VecS: 1686 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1687 break; 1688 case Op_VecD: 1689 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1690 break; 1691 case Op_VecX: 1692 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1693 break; 1694 case Op_VecY: 1695 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1696 break; 1697 case Op_VecZ: 1698 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1699 break; 1700 default: 1701 ShouldNotReachHere(); 1702 } 1703 } else { // store 1704 switch (ireg) { 1705 case Op_VecS: 1706 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1707 break; 1708 case Op_VecD: 1709 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1710 break; 1711 case Op_VecX: 1712 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1713 break; 1714 case Op_VecY: 1715 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1716 break; 1717 case Op_VecZ: 1718 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1719 break; 1720 default: 1721 ShouldNotReachHere(); 1722 } 1723 } 1724 int size = __ offset() - offset; 1725 #ifdef ASSERT 1726 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1727 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1728 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1729 #endif 1730 return size; 1731 #ifndef PRODUCT 1732 } else if (!do_size) { 1733 if (is_load) { 1734 switch (ireg) { 1735 case Op_VecS: 1736 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1737 break; 1738 case Op_VecD: 1739 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1740 break; 1741 case Op_VecX: 1742 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1743 break; 1744 case Op_VecY: 1745 case Op_VecZ: 1746 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1747 break; 1748 default: 1749 ShouldNotReachHere(); 1750 } 1751 } else { // store 1752 switch (ireg) { 1753 case Op_VecS: 1754 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1755 break; 1756 case Op_VecD: 1757 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1758 break; 1759 case Op_VecX: 1760 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1761 break; 1762 case Op_VecY: 1763 case Op_VecZ: 1764 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1765 break; 1766 default: 1767 ShouldNotReachHere(); 1768 } 1769 } 1770 #endif 1771 } 1772 bool is_single_byte = false; 1773 int vec_len = 0; 1774 if ((UseAVX > 2) && (stack_offset != 0)) { 1775 int tuple_type = Assembler::EVEX_FVM; 1776 int input_size = Assembler::EVEX_32bit; 1777 switch (ireg) { 1778 case Op_VecS: 1779 tuple_type = Assembler::EVEX_T1S; 1780 break; 1781 case Op_VecD: 1782 tuple_type = Assembler::EVEX_T1S; 1783 input_size = Assembler::EVEX_64bit; 1784 break; 1785 case Op_VecX: 1786 break; 1787 case Op_VecY: 1788 vec_len = 1; 1789 break; 1790 case Op_VecZ: 1791 vec_len = 2; 1792 break; 1793 } 1794 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1795 } 1796 int offset_size = 0; 1797 int size = 5; 1798 if (UseAVX > 2 ) { 1799 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1800 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1801 size += 2; // Need an additional two bytes for EVEX encoding 1802 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1803 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1804 } else { 1805 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1806 size += 2; // Need an additional two bytes for EVEX encodding 1807 } 1808 } else { 1809 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1810 } 1811 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1812 return size+offset_size; 1813 } 1814 1815 static inline jint replicate4_imm(int con, int width) { 1816 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1817 assert(width == 1 || width == 2, "only byte or short types here"); 1818 int bit_width = width * 8; 1819 jint val = con; 1820 val &= (1 << bit_width) - 1; // mask off sign bits 1821 while(bit_width < 32) { 1822 val |= (val << bit_width); 1823 bit_width <<= 1; 1824 } 1825 return val; 1826 } 1827 1828 static inline jlong replicate8_imm(int con, int width) { 1829 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1830 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1831 int bit_width = width * 8; 1832 jlong val = con; 1833 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1834 while(bit_width < 64) { 1835 val |= (val << bit_width); 1836 bit_width <<= 1; 1837 } 1838 return val; 1839 } 1840 1841 #ifndef PRODUCT 1842 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1843 st->print("nop \t# %d bytes pad for loops and calls", _count); 1844 } 1845 #endif 1846 1847 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1848 MacroAssembler _masm(&cbuf); 1849 __ nop(_count); 1850 } 1851 1852 uint MachNopNode::size(PhaseRegAlloc*) const { 1853 return _count; 1854 } 1855 1856 #ifndef PRODUCT 1857 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1858 st->print("# breakpoint"); 1859 } 1860 #endif 1861 1862 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1863 MacroAssembler _masm(&cbuf); 1864 __ int3(); 1865 } 1866 1867 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1868 return MachNode::size(ra_); 1869 } 1870 1871 %} 1872 1873 encode %{ 1874 1875 enc_class call_epilog %{ 1876 if (VerifyStackAtCalls) { 1877 // Check that stack depth is unchanged: find majik cookie on stack 1878 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1879 MacroAssembler _masm(&cbuf); 1880 Label L; 1881 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1882 __ jccb(Assembler::equal, L); 1883 // Die if stack mismatch 1884 __ int3(); 1885 __ bind(L); 1886 } 1887 %} 1888 1889 %} 1890 1891 1892 //----------OPERANDS----------------------------------------------------------- 1893 // Operand definitions must precede instruction definitions for correct parsing 1894 // in the ADLC because operands constitute user defined types which are used in 1895 // instruction definitions. 1896 1897 // This one generically applies only for evex, so only one version 1898 operand vecZ() %{ 1899 constraint(ALLOC_IN_RC(vectorz_reg)); 1900 match(VecZ); 1901 1902 format %{ %} 1903 interface(REG_INTER); 1904 %} 1905 1906 operand rxmm0() %{ 1907 Â Â constraint(ALLOC_IN_RC(xmm0_reg)); match(VecX); 1908 Â Â predicate((UseSSE > 0) && (UseAVX == 0)); format%{%} interface(REG_INTER); 1909 %} 1910 1911 // Comparison Code for FP conditional move 1912 operand cmpOp_vcmppd() %{ 1913 match(Bool); 1914 1915 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1916 n->as_Bool()->_test._test != BoolTest::no_overflow); 1917 format %{ "" %} 1918 interface(COND_INTER) %{ 1919 equal (0x0, "eq"); 1920 less (0x1, "lt"); 1921 less_equal (0x2, "le"); 1922 not_equal (0xC, "ne"); 1923 greater_equal(0xD, "ge"); 1924 greater (0xE, "gt"); 1925 //TODO cannot compile (adlc breaks) without two next lines with error: 1926 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1927 // equal' for overflow. 1928 overflow (0x20, "o"); // not really supported by the instruction 1929 no_overflow (0x21, "no"); // not really supported by the instruction 1930 %} 1931 %} 1932 1933 1934 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1935 1936 // ============================================================================ 1937 1938 instruct ShouldNotReachHere() %{ 1939 match(Halt); 1940 format %{ "ud2\t# ShouldNotReachHere" %} 1941 ins_encode %{ 1942 __ ud2(); 1943 %} 1944 ins_pipe(pipe_slow); 1945 %} 1946 1947 // =================================EVEX special=============================== 1948 1949 instruct setMask(rRegI dst, rRegI src) %{ 1950 predicate(Matcher::has_predicated_vectors()); 1951 match(Set dst (SetVectMaskI src)); 1952 effect(TEMP dst); 1953 format %{ "setvectmask $dst, $src" %} 1954 ins_encode %{ 1955 __ setvectmask($dst$$Register, $src$$Register); 1956 %} 1957 ins_pipe(pipe_slow); 1958 %} 1959 1960 // ============================================================================ 1961 1962 instruct addF_reg(regF dst, regF src) %{ 1963 predicate((UseSSE>=1) && (UseAVX == 0)); 1964 match(Set dst (AddF dst src)); 1965 1966 format %{ "addss $dst, $src" %} 1967 ins_cost(150); 1968 ins_encode %{ 1969 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1970 %} 1971 ins_pipe(pipe_slow); 1972 %} 1973 1974 instruct addF_mem(regF dst, memory src) %{ 1975 predicate((UseSSE>=1) && (UseAVX == 0)); 1976 match(Set dst (AddF dst (LoadF src))); 1977 1978 format %{ "addss $dst, $src" %} 1979 ins_cost(150); 1980 ins_encode %{ 1981 __ addss($dst$$XMMRegister, $src$$Address); 1982 %} 1983 ins_pipe(pipe_slow); 1984 %} 1985 1986 instruct addF_imm(regF dst, immF con) %{ 1987 predicate((UseSSE>=1) && (UseAVX == 0)); 1988 match(Set dst (AddF dst con)); 1989 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1990 ins_cost(150); 1991 ins_encode %{ 1992 __ addss($dst$$XMMRegister, $constantaddress($con)); 1993 %} 1994 ins_pipe(pipe_slow); 1995 %} 1996 1997 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1998 predicate(UseAVX > 0); 1999 match(Set dst (AddF src1 src2)); 2000 2001 format %{ "vaddss $dst, $src1, $src2" %} 2002 ins_cost(150); 2003 ins_encode %{ 2004 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2005 %} 2006 ins_pipe(pipe_slow); 2007 %} 2008 2009 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2010 predicate(UseAVX > 0); 2011 match(Set dst (AddF src1 (LoadF src2))); 2012 2013 format %{ "vaddss $dst, $src1, $src2" %} 2014 ins_cost(150); 2015 ins_encode %{ 2016 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2017 %} 2018 ins_pipe(pipe_slow); 2019 %} 2020 2021 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2022 predicate(UseAVX > 0); 2023 match(Set dst (AddF src con)); 2024 2025 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2026 ins_cost(150); 2027 ins_encode %{ 2028 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2029 %} 2030 ins_pipe(pipe_slow); 2031 %} 2032 2033 instruct addD_reg(regD dst, regD src) %{ 2034 predicate((UseSSE>=2) && (UseAVX == 0)); 2035 match(Set dst (AddD dst src)); 2036 2037 format %{ "addsd $dst, $src" %} 2038 ins_cost(150); 2039 ins_encode %{ 2040 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2041 %} 2042 ins_pipe(pipe_slow); 2043 %} 2044 2045 instruct addD_mem(regD dst, memory src) %{ 2046 predicate((UseSSE>=2) && (UseAVX == 0)); 2047 match(Set dst (AddD dst (LoadD src))); 2048 2049 format %{ "addsd $dst, $src" %} 2050 ins_cost(150); 2051 ins_encode %{ 2052 __ addsd($dst$$XMMRegister, $src$$Address); 2053 %} 2054 ins_pipe(pipe_slow); 2055 %} 2056 2057 instruct addD_imm(regD dst, immD con) %{ 2058 predicate((UseSSE>=2) && (UseAVX == 0)); 2059 match(Set dst (AddD dst con)); 2060 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2061 ins_cost(150); 2062 ins_encode %{ 2063 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2064 %} 2065 ins_pipe(pipe_slow); 2066 %} 2067 2068 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2069 predicate(UseAVX > 0); 2070 match(Set dst (AddD src1 src2)); 2071 2072 format %{ "vaddsd $dst, $src1, $src2" %} 2073 ins_cost(150); 2074 ins_encode %{ 2075 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2076 %} 2077 ins_pipe(pipe_slow); 2078 %} 2079 2080 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2081 predicate(UseAVX > 0); 2082 match(Set dst (AddD src1 (LoadD src2))); 2083 2084 format %{ "vaddsd $dst, $src1, $src2" %} 2085 ins_cost(150); 2086 ins_encode %{ 2087 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2088 %} 2089 ins_pipe(pipe_slow); 2090 %} 2091 2092 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2093 predicate(UseAVX > 0); 2094 match(Set dst (AddD src con)); 2095 2096 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2097 ins_cost(150); 2098 ins_encode %{ 2099 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2100 %} 2101 ins_pipe(pipe_slow); 2102 %} 2103 2104 instruct subF_reg(regF dst, regF src) %{ 2105 predicate((UseSSE>=1) && (UseAVX == 0)); 2106 match(Set dst (SubF dst src)); 2107 2108 format %{ "subss $dst, $src" %} 2109 ins_cost(150); 2110 ins_encode %{ 2111 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2112 %} 2113 ins_pipe(pipe_slow); 2114 %} 2115 2116 instruct subF_mem(regF dst, memory src) %{ 2117 predicate((UseSSE>=1) && (UseAVX == 0)); 2118 match(Set dst (SubF dst (LoadF src))); 2119 2120 format %{ "subss $dst, $src" %} 2121 ins_cost(150); 2122 ins_encode %{ 2123 __ subss($dst$$XMMRegister, $src$$Address); 2124 %} 2125 ins_pipe(pipe_slow); 2126 %} 2127 2128 instruct subF_imm(regF dst, immF con) %{ 2129 predicate((UseSSE>=1) && (UseAVX == 0)); 2130 match(Set dst (SubF dst con)); 2131 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2132 ins_cost(150); 2133 ins_encode %{ 2134 __ subss($dst$$XMMRegister, $constantaddress($con)); 2135 %} 2136 ins_pipe(pipe_slow); 2137 %} 2138 2139 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2140 predicate(UseAVX > 0); 2141 match(Set dst (SubF src1 src2)); 2142 2143 format %{ "vsubss $dst, $src1, $src2" %} 2144 ins_cost(150); 2145 ins_encode %{ 2146 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2147 %} 2148 ins_pipe(pipe_slow); 2149 %} 2150 2151 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2152 predicate(UseAVX > 0); 2153 match(Set dst (SubF src1 (LoadF src2))); 2154 2155 format %{ "vsubss $dst, $src1, $src2" %} 2156 ins_cost(150); 2157 ins_encode %{ 2158 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2159 %} 2160 ins_pipe(pipe_slow); 2161 %} 2162 2163 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2164 predicate(UseAVX > 0); 2165 match(Set dst (SubF src con)); 2166 2167 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2168 ins_cost(150); 2169 ins_encode %{ 2170 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2171 %} 2172 ins_pipe(pipe_slow); 2173 %} 2174 2175 instruct subD_reg(regD dst, regD src) %{ 2176 predicate((UseSSE>=2) && (UseAVX == 0)); 2177 match(Set dst (SubD dst src)); 2178 2179 format %{ "subsd $dst, $src" %} 2180 ins_cost(150); 2181 ins_encode %{ 2182 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2183 %} 2184 ins_pipe(pipe_slow); 2185 %} 2186 2187 instruct subD_mem(regD dst, memory src) %{ 2188 predicate((UseSSE>=2) && (UseAVX == 0)); 2189 match(Set dst (SubD dst (LoadD src))); 2190 2191 format %{ "subsd $dst, $src" %} 2192 ins_cost(150); 2193 ins_encode %{ 2194 __ subsd($dst$$XMMRegister, $src$$Address); 2195 %} 2196 ins_pipe(pipe_slow); 2197 %} 2198 2199 instruct subD_imm(regD dst, immD con) %{ 2200 predicate((UseSSE>=2) && (UseAVX == 0)); 2201 match(Set dst (SubD dst con)); 2202 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2203 ins_cost(150); 2204 ins_encode %{ 2205 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2206 %} 2207 ins_pipe(pipe_slow); 2208 %} 2209 2210 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2211 predicate(UseAVX > 0); 2212 match(Set dst (SubD src1 src2)); 2213 2214 format %{ "vsubsd $dst, $src1, $src2" %} 2215 ins_cost(150); 2216 ins_encode %{ 2217 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2218 %} 2219 ins_pipe(pipe_slow); 2220 %} 2221 2222 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2223 predicate(UseAVX > 0); 2224 match(Set dst (SubD src1 (LoadD src2))); 2225 2226 format %{ "vsubsd $dst, $src1, $src2" %} 2227 ins_cost(150); 2228 ins_encode %{ 2229 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2230 %} 2231 ins_pipe(pipe_slow); 2232 %} 2233 2234 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2235 predicate(UseAVX > 0); 2236 match(Set dst (SubD src con)); 2237 2238 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2239 ins_cost(150); 2240 ins_encode %{ 2241 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2242 %} 2243 ins_pipe(pipe_slow); 2244 %} 2245 2246 instruct mulF_reg(regF dst, regF src) %{ 2247 predicate((UseSSE>=1) && (UseAVX == 0)); 2248 match(Set dst (MulF dst src)); 2249 2250 format %{ "mulss $dst, $src" %} 2251 ins_cost(150); 2252 ins_encode %{ 2253 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2254 %} 2255 ins_pipe(pipe_slow); 2256 %} 2257 2258 instruct mulF_mem(regF dst, memory src) %{ 2259 predicate((UseSSE>=1) && (UseAVX == 0)); 2260 match(Set dst (MulF dst (LoadF src))); 2261 2262 format %{ "mulss $dst, $src" %} 2263 ins_cost(150); 2264 ins_encode %{ 2265 __ mulss($dst$$XMMRegister, $src$$Address); 2266 %} 2267 ins_pipe(pipe_slow); 2268 %} 2269 2270 instruct mulF_imm(regF dst, immF con) %{ 2271 predicate((UseSSE>=1) && (UseAVX == 0)); 2272 match(Set dst (MulF dst con)); 2273 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2274 ins_cost(150); 2275 ins_encode %{ 2276 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2277 %} 2278 ins_pipe(pipe_slow); 2279 %} 2280 2281 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2282 predicate(UseAVX > 0); 2283 match(Set dst (MulF src1 src2)); 2284 2285 format %{ "vmulss $dst, $src1, $src2" %} 2286 ins_cost(150); 2287 ins_encode %{ 2288 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2289 %} 2290 ins_pipe(pipe_slow); 2291 %} 2292 2293 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2294 predicate(UseAVX > 0); 2295 match(Set dst (MulF src1 (LoadF src2))); 2296 2297 format %{ "vmulss $dst, $src1, $src2" %} 2298 ins_cost(150); 2299 ins_encode %{ 2300 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2301 %} 2302 ins_pipe(pipe_slow); 2303 %} 2304 2305 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2306 predicate(UseAVX > 0); 2307 match(Set dst (MulF src con)); 2308 2309 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2310 ins_cost(150); 2311 ins_encode %{ 2312 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2313 %} 2314 ins_pipe(pipe_slow); 2315 %} 2316 2317 instruct mulD_reg(regD dst, regD src) %{ 2318 predicate((UseSSE>=2) && (UseAVX == 0)); 2319 match(Set dst (MulD dst src)); 2320 2321 format %{ "mulsd $dst, $src" %} 2322 ins_cost(150); 2323 ins_encode %{ 2324 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2325 %} 2326 ins_pipe(pipe_slow); 2327 %} 2328 2329 instruct mulD_mem(regD dst, memory src) %{ 2330 predicate((UseSSE>=2) && (UseAVX == 0)); 2331 match(Set dst (MulD dst (LoadD src))); 2332 2333 format %{ "mulsd $dst, $src" %} 2334 ins_cost(150); 2335 ins_encode %{ 2336 __ mulsd($dst$$XMMRegister, $src$$Address); 2337 %} 2338 ins_pipe(pipe_slow); 2339 %} 2340 2341 instruct mulD_imm(regD dst, immD con) %{ 2342 predicate((UseSSE>=2) && (UseAVX == 0)); 2343 match(Set dst (MulD dst con)); 2344 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2345 ins_cost(150); 2346 ins_encode %{ 2347 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2348 %} 2349 ins_pipe(pipe_slow); 2350 %} 2351 2352 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2353 predicate(UseAVX > 0); 2354 match(Set dst (MulD src1 src2)); 2355 2356 format %{ "vmulsd $dst, $src1, $src2" %} 2357 ins_cost(150); 2358 ins_encode %{ 2359 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2360 %} 2361 ins_pipe(pipe_slow); 2362 %} 2363 2364 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2365 predicate(UseAVX > 0); 2366 match(Set dst (MulD src1 (LoadD src2))); 2367 2368 format %{ "vmulsd $dst, $src1, $src2" %} 2369 ins_cost(150); 2370 ins_encode %{ 2371 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2372 %} 2373 ins_pipe(pipe_slow); 2374 %} 2375 2376 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2377 predicate(UseAVX > 0); 2378 match(Set dst (MulD src con)); 2379 2380 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2381 ins_cost(150); 2382 ins_encode %{ 2383 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2384 %} 2385 ins_pipe(pipe_slow); 2386 %} 2387 2388 instruct divF_reg(regF dst, regF src) %{ 2389 predicate((UseSSE>=1) && (UseAVX == 0)); 2390 match(Set dst (DivF dst src)); 2391 2392 format %{ "divss $dst, $src" %} 2393 ins_cost(150); 2394 ins_encode %{ 2395 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2396 %} 2397 ins_pipe(pipe_slow); 2398 %} 2399 2400 instruct divF_mem(regF dst, memory src) %{ 2401 predicate((UseSSE>=1) && (UseAVX == 0)); 2402 match(Set dst (DivF dst (LoadF src))); 2403 2404 format %{ "divss $dst, $src" %} 2405 ins_cost(150); 2406 ins_encode %{ 2407 __ divss($dst$$XMMRegister, $src$$Address); 2408 %} 2409 ins_pipe(pipe_slow); 2410 %} 2411 2412 instruct divF_imm(regF dst, immF con) %{ 2413 predicate((UseSSE>=1) && (UseAVX == 0)); 2414 match(Set dst (DivF dst con)); 2415 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2416 ins_cost(150); 2417 ins_encode %{ 2418 __ divss($dst$$XMMRegister, $constantaddress($con)); 2419 %} 2420 ins_pipe(pipe_slow); 2421 %} 2422 2423 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2424 predicate(UseAVX > 0); 2425 match(Set dst (DivF src1 src2)); 2426 2427 format %{ "vdivss $dst, $src1, $src2" %} 2428 ins_cost(150); 2429 ins_encode %{ 2430 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2431 %} 2432 ins_pipe(pipe_slow); 2433 %} 2434 2435 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2436 predicate(UseAVX > 0); 2437 match(Set dst (DivF src1 (LoadF src2))); 2438 2439 format %{ "vdivss $dst, $src1, $src2" %} 2440 ins_cost(150); 2441 ins_encode %{ 2442 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2443 %} 2444 ins_pipe(pipe_slow); 2445 %} 2446 2447 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2448 predicate(UseAVX > 0); 2449 match(Set dst (DivF src con)); 2450 2451 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2452 ins_cost(150); 2453 ins_encode %{ 2454 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2455 %} 2456 ins_pipe(pipe_slow); 2457 %} 2458 2459 instruct divD_reg(regD dst, regD src) %{ 2460 predicate((UseSSE>=2) && (UseAVX == 0)); 2461 match(Set dst (DivD dst src)); 2462 2463 format %{ "divsd $dst, $src" %} 2464 ins_cost(150); 2465 ins_encode %{ 2466 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2467 %} 2468 ins_pipe(pipe_slow); 2469 %} 2470 2471 instruct divD_mem(regD dst, memory src) %{ 2472 predicate((UseSSE>=2) && (UseAVX == 0)); 2473 match(Set dst (DivD dst (LoadD src))); 2474 2475 format %{ "divsd $dst, $src" %} 2476 ins_cost(150); 2477 ins_encode %{ 2478 __ divsd($dst$$XMMRegister, $src$$Address); 2479 %} 2480 ins_pipe(pipe_slow); 2481 %} 2482 2483 instruct divD_imm(regD dst, immD con) %{ 2484 predicate((UseSSE>=2) && (UseAVX == 0)); 2485 match(Set dst (DivD dst con)); 2486 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2487 ins_cost(150); 2488 ins_encode %{ 2489 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2490 %} 2491 ins_pipe(pipe_slow); 2492 %} 2493 2494 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2495 predicate(UseAVX > 0); 2496 match(Set dst (DivD src1 src2)); 2497 2498 format %{ "vdivsd $dst, $src1, $src2" %} 2499 ins_cost(150); 2500 ins_encode %{ 2501 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2502 %} 2503 ins_pipe(pipe_slow); 2504 %} 2505 2506 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2507 predicate(UseAVX > 0); 2508 match(Set dst (DivD src1 (LoadD src2))); 2509 2510 format %{ "vdivsd $dst, $src1, $src2" %} 2511 ins_cost(150); 2512 ins_encode %{ 2513 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2514 %} 2515 ins_pipe(pipe_slow); 2516 %} 2517 2518 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2519 predicate(UseAVX > 0); 2520 match(Set dst (DivD src con)); 2521 2522 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2523 ins_cost(150); 2524 ins_encode %{ 2525 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2526 %} 2527 ins_pipe(pipe_slow); 2528 %} 2529 2530 instruct absF_reg(regF dst) %{ 2531 predicate((UseSSE>=1) && (UseAVX == 0)); 2532 match(Set dst (AbsF dst)); 2533 ins_cost(150); 2534 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2535 ins_encode %{ 2536 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2537 %} 2538 ins_pipe(pipe_slow); 2539 %} 2540 2541 instruct absF_reg_reg(regF dst, regF src) %{ 2542 predicate(VM_Version::supports_avxonly()); 2543 match(Set dst (AbsF src)); 2544 ins_cost(150); 2545 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2546 ins_encode %{ 2547 int vector_len = 0; 2548 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2549 ExternalAddress(float_signmask()), vector_len); 2550 %} 2551 ins_pipe(pipe_slow); 2552 %} 2553 2554 #ifdef _LP64 2555 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2556 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2557 match(Set dst (AbsF src)); 2558 ins_cost(150); 2559 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2560 ins_encode %{ 2561 int vector_len = 0; 2562 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2563 ExternalAddress(float_signmask()), vector_len); 2564 %} 2565 ins_pipe(pipe_slow); 2566 %} 2567 2568 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2569 predicate(VM_Version::supports_avx512novl()); 2570 match(Set dst (AbsF src1)); 2571 effect(TEMP src2); 2572 ins_cost(150); 2573 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2574 ins_encode %{ 2575 int vector_len = 0; 2576 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2577 ExternalAddress(float_signmask()), vector_len); 2578 %} 2579 ins_pipe(pipe_slow); 2580 %} 2581 #else // _LP64 2582 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2583 predicate(UseAVX > 2); 2584 match(Set dst (AbsF src)); 2585 ins_cost(150); 2586 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2587 ins_encode %{ 2588 int vector_len = 0; 2589 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2590 ExternalAddress(float_signmask()), vector_len); 2591 %} 2592 ins_pipe(pipe_slow); 2593 %} 2594 #endif 2595 2596 instruct absD_reg(regD dst) %{ 2597 predicate((UseSSE>=2) && (UseAVX == 0)); 2598 match(Set dst (AbsD dst)); 2599 ins_cost(150); 2600 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2601 "# abs double by sign masking" %} 2602 ins_encode %{ 2603 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2604 %} 2605 ins_pipe(pipe_slow); 2606 %} 2607 2608 instruct absD_reg_reg(regD dst, regD src) %{ 2609 predicate(VM_Version::supports_avxonly()); 2610 match(Set dst (AbsD src)); 2611 ins_cost(150); 2612 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2613 "# abs double by sign masking" %} 2614 ins_encode %{ 2615 int vector_len = 0; 2616 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2617 ExternalAddress(double_signmask()), vector_len); 2618 %} 2619 ins_pipe(pipe_slow); 2620 %} 2621 2622 #ifdef _LP64 2623 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2624 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2625 match(Set dst (AbsD src)); 2626 ins_cost(150); 2627 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2628 "# abs double by sign masking" %} 2629 ins_encode %{ 2630 int vector_len = 0; 2631 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2632 ExternalAddress(double_signmask()), vector_len); 2633 %} 2634 ins_pipe(pipe_slow); 2635 %} 2636 2637 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2638 predicate(VM_Version::supports_avx512novl()); 2639 match(Set dst (AbsD src1)); 2640 effect(TEMP src2); 2641 ins_cost(150); 2642 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2643 ins_encode %{ 2644 int vector_len = 0; 2645 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2646 ExternalAddress(double_signmask()), vector_len); 2647 %} 2648 ins_pipe(pipe_slow); 2649 %} 2650 #else // _LP64 2651 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2652 predicate(UseAVX > 2); 2653 match(Set dst (AbsD src)); 2654 ins_cost(150); 2655 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2656 "# abs double by sign masking" %} 2657 ins_encode %{ 2658 int vector_len = 0; 2659 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2660 ExternalAddress(double_signmask()), vector_len); 2661 %} 2662 ins_pipe(pipe_slow); 2663 %} 2664 #endif 2665 2666 instruct negI_rReg_2(rRegI dst, rFlagsReg cr) 2667 %{ 2668 match(Set dst (NegI dst)); 2669 effect(KILL cr); 2670 2671 format %{ "negl $dst\t# int" %} 2672 ins_encode %{ 2673 __ negl($dst$$Register); 2674 %} 2675 ins_pipe(ialu_reg); 2676 %} 2677 2678 instruct negL_rReg_2(rRegL dst, rFlagsReg cr) 2679 %{ 2680 match(Set dst (NegL dst)); 2681 effect(KILL cr); 2682 2683 format %{ "negq $dst\t# int" %} 2684 ins_encode %{ 2685 __ negq($dst$$Register); 2686 %} 2687 ins_pipe(ialu_reg); 2688 %} 2689 2690 instruct negF_reg(regF dst) %{ 2691 predicate((UseSSE>=1) && (UseAVX == 0)); 2692 match(Set dst (NegF dst)); 2693 ins_cost(150); 2694 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2695 ins_encode %{ 2696 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2697 %} 2698 ins_pipe(pipe_slow); 2699 %} 2700 2701 instruct negF_reg_reg(regF dst, regF src) %{ 2702 predicate(UseAVX > 0); 2703 match(Set dst (NegF src)); 2704 ins_cost(150); 2705 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2706 ins_encode %{ 2707 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2708 ExternalAddress(float_signflip())); 2709 %} 2710 ins_pipe(pipe_slow); 2711 %} 2712 2713 instruct negD_reg(regD dst) %{ 2714 predicate((UseSSE>=2) && (UseAVX == 0)); 2715 match(Set dst (NegD dst)); 2716 ins_cost(150); 2717 format %{ "xorpd $dst, [0x8000000000000000]\t" 2718 "# neg double by sign flipping" %} 2719 ins_encode %{ 2720 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2721 %} 2722 ins_pipe(pipe_slow); 2723 %} 2724 2725 instruct negD_reg_reg(regD dst, regD src) %{ 2726 predicate(UseAVX > 0); 2727 match(Set dst (NegD src)); 2728 ins_cost(150); 2729 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2730 "# neg double by sign flipping" %} 2731 ins_encode %{ 2732 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2733 ExternalAddress(double_signflip())); 2734 %} 2735 ins_pipe(pipe_slow); 2736 %} 2737 2738 instruct sqrtF_reg(regF dst, regF src) %{ 2739 predicate(UseSSE>=1); 2740 match(Set dst (SqrtF src)); 2741 2742 format %{ "sqrtss $dst, $src" %} 2743 ins_cost(150); 2744 ins_encode %{ 2745 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2746 %} 2747 ins_pipe(pipe_slow); 2748 %} 2749 2750 instruct sqrtF_mem(regF dst, memory src) %{ 2751 predicate(UseSSE>=1); 2752 match(Set dst (SqrtF (LoadF src))); 2753 2754 format %{ "sqrtss $dst, $src" %} 2755 ins_cost(150); 2756 ins_encode %{ 2757 __ sqrtss($dst$$XMMRegister, $src$$Address); 2758 %} 2759 ins_pipe(pipe_slow); 2760 %} 2761 2762 instruct sqrtF_imm(regF dst, immF con) %{ 2763 predicate(UseSSE>=1); 2764 match(Set dst (SqrtF con)); 2765 2766 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2767 ins_cost(150); 2768 ins_encode %{ 2769 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2770 %} 2771 ins_pipe(pipe_slow); 2772 %} 2773 2774 instruct sqrtD_reg(regD dst, regD src) %{ 2775 predicate(UseSSE>=2); 2776 match(Set dst (SqrtD src)); 2777 2778 format %{ "sqrtsd $dst, $src" %} 2779 ins_cost(150); 2780 ins_encode %{ 2781 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2782 %} 2783 ins_pipe(pipe_slow); 2784 %} 2785 2786 instruct sqrtD_mem(regD dst, memory src) %{ 2787 predicate(UseSSE>=2); 2788 match(Set dst (SqrtD (LoadD src))); 2789 2790 format %{ "sqrtsd $dst, $src" %} 2791 ins_cost(150); 2792 ins_encode %{ 2793 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2794 %} 2795 ins_pipe(pipe_slow); 2796 %} 2797 2798 instruct sqrtD_imm(regD dst, immD con) %{ 2799 predicate(UseSSE>=2); 2800 match(Set dst (SqrtD con)); 2801 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2802 ins_cost(150); 2803 ins_encode %{ 2804 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2805 %} 2806 ins_pipe(pipe_slow); 2807 %} 2808 2809 instruct onspinwait() %{ 2810 match(OnSpinWait); 2811 ins_cost(200); 2812 2813 format %{ 2814 $$template 2815 if (os::is_MP()) { 2816 $$emit$$"pause\t! membar_onspinwait" 2817 } else { 2818 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2819 } 2820 %} 2821 ins_encode %{ 2822 __ pause(); 2823 %} 2824 ins_pipe(pipe_slow); 2825 %} 2826 2827 // a * b + c 2828 instruct fmaD_reg(regD a, regD b, regD c) %{ 2829 predicate(UseFMA); 2830 match(Set c (FmaD c (Binary a b))); 2831 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2832 ins_cost(150); 2833 ins_encode %{ 2834 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2835 %} 2836 ins_pipe( pipe_slow ); 2837 %} 2838 2839 // a * b + c 2840 instruct fmaF_reg(regF a, regF b, regF c) %{ 2841 predicate(UseFMA); 2842 match(Set c (FmaF c (Binary a b))); 2843 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2844 ins_cost(150); 2845 ins_encode %{ 2846 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2847 %} 2848 ins_pipe( pipe_slow ); 2849 %} 2850 2851 // ====================VECTOR INSTRUCTIONS===================================== 2852 2853 instruct reinterpretS(vecS dst) %{ 2854 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2855 match(Set dst (VectorReinterpret dst)); 2856 ins_cost(125); 2857 format %{ " # reinterpret $dst" %} 2858 ins_encode %{ 2859 // empty 2860 %} 2861 ins_pipe( pipe_slow ); 2862 %} 2863 2864 instruct reinterpretS2D(vecD dst, vecS src, rRegL scratch) %{ 2865 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2866 match(Set dst (VectorReinterpret src)); 2867 ins_cost(125); 2868 effect(TEMP dst, TEMP scratch); 2869 format %{ " # reinterpret $dst,$src" %} 2870 ins_encode %{ 2871 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 2872 __ pand($dst$$XMMRegister, $src$$XMMRegister); 2873 %} 2874 ins_pipe( pipe_slow ); 2875 %} 2876 2877 instruct reinterpretS2D_avx(vecD dst, vecS src, rRegL scratch) %{ 2878 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2879 match(Set dst (VectorReinterpret src)); 2880 ins_cost(125); 2881 effect(TEMP dst, TEMP scratch); 2882 format %{ " # reinterpret $dst,$src" %} 2883 ins_encode %{ 2884 int vector_len = 0; 2885 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 2886 %} 2887 ins_pipe( pipe_slow ); 2888 %} 2889 2890 instruct reinterpretS2X(vecX dst, vecS src, rRegL scratch) %{ 2891 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2892 match(Set dst (VectorReinterpret src)); 2893 ins_cost(125); 2894 effect(TEMP dst, TEMP scratch); 2895 format %{ " # reinterpret $dst,$src" %} 2896 ins_encode %{ 2897 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 2898 __ pand($dst$$XMMRegister, $src$$XMMRegister); 2899 %} 2900 ins_pipe( pipe_slow ); 2901 %} 2902 2903 instruct reinterpretS2X_avx(vecX dst, vecS src, rRegL scratch) %{ 2904 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2905 match(Set dst (VectorReinterpret src)); 2906 ins_cost(125); 2907 effect(TEMP scratch); 2908 format %{ " # reinterpret $dst,$src" %} 2909 ins_encode %{ 2910 int vector_len = 0; 2911 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 2912 %} 2913 ins_pipe( pipe_slow ); 2914 %} 2915 2916 instruct reinterpretS2Y(vecY dst, vecS src, rRegL scratch) %{ 2917 predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2918 match(Set dst (VectorReinterpret src)); 2919 ins_cost(125); 2920 effect(TEMP scratch); 2921 format %{ " # reinterpret $dst,$src" %} 2922 ins_encode %{ 2923 int vector_len = 1; 2924 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 2925 %} 2926 ins_pipe( pipe_slow ); 2927 %} 2928 2929 instruct reinterpretS2Z(vecZ dst, vecS src, rRegL scratch) %{ 2930 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2931 match(Set dst (VectorReinterpret src)); 2932 ins_cost(125); 2933 effect(TEMP scratch); 2934 format %{ " # reinterpret $dst,$src" %} 2935 ins_encode %{ 2936 int vector_len = 2; 2937 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 2938 %} 2939 ins_pipe( pipe_slow ); 2940 %} 2941 2942 instruct reinterpretD2S(vecS dst, vecD src) %{ 2943 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2944 match(Set dst (VectorReinterpret src)); 2945 ins_cost(125); 2946 format %{ " # reinterpret $dst,$src" %} 2947 ins_encode %{ 2948 // If register is the same, then move is not needed. 2949 if ($dst$$XMMRegister != $src$$XMMRegister) { 2950 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2951 } 2952 %} 2953 ins_pipe( pipe_slow ); 2954 %} 2955 2956 instruct reinterpretD(vecD dst) %{ 2957 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2958 match(Set dst (VectorReinterpret dst)); 2959 ins_cost(125); 2960 format %{ " # reinterpret $dst" %} 2961 ins_encode %{ 2962 // empty 2963 %} 2964 ins_pipe( pipe_slow ); 2965 %} 2966 2967 instruct reinterpretD2X(vecX dst, vecD src, rRegL scratch) %{ 2968 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2969 match(Set dst (VectorReinterpret src)); 2970 ins_cost(125); 2971 effect(TEMP dst, TEMP scratch); 2972 format %{ " # reinterpret $dst,$src" %} 2973 ins_encode %{ 2974 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 2975 __ pand($dst$$XMMRegister, $src$$XMMRegister); 2976 %} 2977 ins_pipe( pipe_slow ); 2978 %} 2979 2980 instruct reinterpretD2X_avx(vecX dst, vecD src, rRegL scratch) %{ 2981 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2982 match(Set dst (VectorReinterpret src)); 2983 ins_cost(125); 2984 effect(TEMP dst, TEMP scratch); 2985 format %{ " # reinterpret $dst,$src" %} 2986 ins_encode %{ 2987 int vector_len = 0; 2988 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 2989 %} 2990 ins_pipe( pipe_slow ); 2991 %} 2992 2993 instruct reinterpretD2Y(vecY dst, vecD src, rRegL scratch) %{ 2994 predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2995 match(Set dst (VectorReinterpret src)); 2996 ins_cost(125); 2997 effect(TEMP scratch); 2998 format %{ " # reinterpret $dst,$src" %} 2999 ins_encode %{ 3000 int vector_len = 1; 3001 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 instruct reinterpretD2Z(vecZ dst, vecD src, rRegL scratch) %{ 3007 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3008 match(Set dst (VectorReinterpret src)); 3009 ins_cost(125); 3010 effect(TEMP scratch); 3011 format %{ " # reinterpret $dst,$src" %} 3012 ins_encode %{ 3013 int vector_len = 2; 3014 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3015 %} 3016 ins_pipe( pipe_slow ); 3017 %} 3018 3019 instruct reinterpretX2S(vecS dst, vecX src) %{ 3020 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3021 match(Set dst (VectorReinterpret src)); 3022 ins_cost(125); 3023 format %{ " # reinterpret $dst,$src" %} 3024 ins_encode %{ 3025 // If register is the same, then move is not needed. 3026 if ($dst$$XMMRegister != $src$$XMMRegister) { 3027 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3028 } 3029 %} 3030 ins_pipe( pipe_slow ); 3031 %} 3032 3033 instruct reinterpretX2D(vecD dst, vecX src) %{ 3034 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3035 match(Set dst (VectorReinterpret src)); 3036 ins_cost(125); 3037 format %{ " # reinterpret $dst,$src" %} 3038 ins_encode %{ 3039 // If register is the same, then move is not needed. 3040 if ($dst$$XMMRegister != $src$$XMMRegister) { 3041 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3042 } 3043 %} 3044 ins_pipe( pipe_slow ); 3045 %} 3046 3047 instruct reinterpretX(vecX dst) %{ 3048 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3049 match(Set dst (VectorReinterpret dst)); 3050 ins_cost(125); 3051 format %{ " # reinterpret $dst" %} 3052 ins_encode %{ 3053 // empty 3054 %} 3055 ins_pipe( pipe_slow ); 3056 %} 3057 3058 instruct reinterpretX2Y(vecY dst, vecX src) %{ 3059 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3060 match(Set dst (VectorReinterpret src)); 3061 ins_cost(125); 3062 effect(TEMP dst); 3063 format %{ " # reinterpret $dst,$src" %} 3064 ins_encode %{ 3065 int vector_len = 1; 3066 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3067 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved 3068 %} 3069 ins_pipe( pipe_slow ); 3070 %} 3071 3072 instruct reinterpretX2Z(vecZ dst, vecX src) %{ 3073 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3074 match(Set dst (VectorReinterpret src)); 3075 ins_cost(125); 3076 effect(TEMP dst); 3077 format %{ " # reinterpret $dst,$src" %} 3078 ins_encode %{ 3079 int vector_len = 2; 3080 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3081 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved 3082 %} 3083 ins_pipe( pipe_slow ); 3084 %} 3085 3086 instruct reinterpretY2S(vecS dst, vecY src) %{ 3087 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3088 match(Set dst (VectorReinterpret src)); 3089 ins_cost(125); 3090 format %{ " # reinterpret $dst,$src" %} 3091 ins_encode %{ 3092 // If register is the same, then move is not needed. 3093 if ($dst$$XMMRegister != $src$$XMMRegister) { 3094 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3095 } 3096 %} 3097 ins_pipe( pipe_slow ); 3098 %} 3099 3100 instruct reinterpretY2D(vecD dst, vecY src) %{ 3101 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3102 match(Set dst (VectorReinterpret src)); 3103 ins_cost(125); 3104 format %{ " # reinterpret $dst,$src" %} 3105 ins_encode %{ 3106 // If register is the same, then move is not needed. 3107 if ($dst$$XMMRegister != $src$$XMMRegister) { 3108 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3109 } 3110 %} 3111 ins_pipe( pipe_slow ); 3112 %} 3113 3114 instruct reinterpretY2X(vecX dst, vecY src) %{ 3115 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3116 match(Set dst (VectorReinterpret src)); 3117 ins_cost(125); 3118 format %{ " # reinterpret $dst,$src" %} 3119 ins_encode %{ 3120 // If register is the same, then move is not needed. 3121 if ($dst$$XMMRegister != $src$$XMMRegister) { 3122 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3123 } 3124 %} 3125 ins_pipe( pipe_slow ); 3126 %} 3127 3128 instruct reinterpretY(vecY dst) %{ 3129 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3130 match(Set dst (VectorReinterpret dst)); 3131 ins_cost(125); 3132 format %{ " # reinterpret $dst" %} 3133 ins_encode %{ 3134 // empty 3135 %} 3136 ins_pipe( pipe_slow ); 3137 %} 3138 3139 instruct reinterpretY2Z(vecZ dst, vecY src) %{ 3140 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3141 match(Set dst (VectorReinterpret src)); 3142 ins_cost(125); 3143 effect(TEMP dst); 3144 format %{ " # reinterpret $dst,$src" %} 3145 ins_encode %{ 3146 int vector_len = 2; 3147 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3148 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3149 %} 3150 ins_pipe( pipe_slow ); 3151 %} 3152 3153 instruct reinterpretZ2S(vecS dst, vecZ src) %{ 3154 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3155 match(Set dst (VectorReinterpret src)); 3156 ins_cost(125); 3157 format %{ " # reinterpret $dst,$src" %} 3158 ins_encode %{ 3159 // If register is the same, then move is not needed. 3160 if ($dst$$XMMRegister != $src$$XMMRegister) { 3161 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3162 } 3163 %} 3164 ins_pipe( pipe_slow ); 3165 %} 3166 3167 instruct reinterpretZ2D(vecD dst, vecZ src) %{ 3168 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3169 match(Set dst (VectorReinterpret src)); 3170 ins_cost(125); 3171 format %{ " # reinterpret $dst,$src" %} 3172 ins_encode %{ 3173 // If register is the same, then move is not needed. 3174 if ($dst$$XMMRegister != $src$$XMMRegister) { 3175 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3176 } 3177 %} 3178 ins_pipe( pipe_slow ); 3179 %} 3180 3181 instruct reinterpretZ2X(vecX dst, vecZ src) %{ 3182 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3183 match(Set dst (VectorReinterpret src)); 3184 ins_cost(125); 3185 format %{ " # reinterpret $dst,$src" %} 3186 ins_encode %{ 3187 // If register is the same, then move is not needed. 3188 if ($dst$$XMMRegister != $src$$XMMRegister) { 3189 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3190 } 3191 %} 3192 ins_pipe( pipe_slow ); 3193 %} 3194 3195 instruct reinterpretZ2Y(vecY dst, vecZ src) %{ 3196 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3197 match(Set dst (VectorReinterpret src)); 3198 ins_cost(125); 3199 format %{ " # reinterpret $dst,$src" %} 3200 ins_encode %{ 3201 // If register is the same, then move is not needed. 3202 if ($dst$$XMMRegister != $src$$XMMRegister) { 3203 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3204 } 3205 %} 3206 ins_pipe( pipe_slow ); 3207 %} 3208 3209 instruct reinterpretZ(vecZ dst) %{ 3210 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3211 match(Set dst (VectorReinterpret dst)); 3212 ins_cost(125); 3213 format %{ " # reinterpret $dst" %} 3214 ins_encode %{ 3215 // empty 3216 %} 3217 ins_pipe( pipe_slow ); 3218 %} 3219 3220 // ========== 3221 3222 // Load vectors (1 byte long) 3223 instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ 3224 predicate(n->as_LoadVector()->memory_size() == 1); 3225 match(Set dst (LoadVector mem)); 3226 ins_cost(125); 3227 effect(TEMP tmp); 3228 format %{ "movzbl $tmp,$mem\n\t" 3229 "movd $dst,$tmp\t! load vector (1 byte)" %} 3230 ins_encode %{ 3231 __ movzbl($tmp$$Register, $mem$$Address); 3232 __ movdl($dst$$XMMRegister, $tmp$$Register); 3233 %} 3234 ins_pipe( pipe_slow ); 3235 %} 3236 3237 // Load vectors (2 bytes long) 3238 instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ 3239 predicate(n->as_LoadVector()->memory_size() == 2); 3240 match(Set dst (LoadVector mem)); 3241 ins_cost(125); 3242 effect(TEMP tmp); 3243 format %{ "movzwl $tmp,$mem\n\t" 3244 "movd $dst,$tmp\t! load vector (2 bytes)" %} 3245 ins_encode %{ 3246 __ movzwl($tmp$$Register, $mem$$Address); 3247 __ movdl($dst$$XMMRegister, $tmp$$Register); 3248 %} 3249 ins_pipe( pipe_slow ); 3250 %} 3251 3252 // Load vectors (4 bytes long) 3253 instruct loadV4(vecS dst, memory mem) %{ 3254 predicate(n->as_LoadVector()->memory_size() == 4); 3255 match(Set dst (LoadVector mem)); 3256 ins_cost(125); 3257 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3258 ins_encode %{ 3259 __ movdl($dst$$XMMRegister, $mem$$Address); 3260 %} 3261 ins_pipe( pipe_slow ); 3262 %} 3263 3264 // Load vectors (8 bytes long) 3265 instruct loadV8(vecD dst, memory mem) %{ 3266 predicate(n->as_LoadVector()->memory_size() == 8); 3267 match(Set dst (LoadVector mem)); 3268 ins_cost(125); 3269 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3270 ins_encode %{ 3271 __ movq($dst$$XMMRegister, $mem$$Address); 3272 %} 3273 ins_pipe( pipe_slow ); 3274 %} 3275 3276 // Load vectors (16 bytes long) 3277 instruct loadV16(vecX dst, memory mem) %{ 3278 predicate(n->as_LoadVector()->memory_size() == 16); 3279 match(Set dst (LoadVector mem)); 3280 ins_cost(125); 3281 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3282 ins_encode %{ 3283 __ movdqu($dst$$XMMRegister, $mem$$Address); 3284 %} 3285 ins_pipe( pipe_slow ); 3286 %} 3287 3288 // Load vectors (32 bytes long) 3289 instruct loadV32(vecY dst, memory mem) %{ 3290 predicate(n->as_LoadVector()->memory_size() == 32); 3291 match(Set dst (LoadVector mem)); 3292 ins_cost(125); 3293 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3294 ins_encode %{ 3295 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3296 %} 3297 ins_pipe( pipe_slow ); 3298 %} 3299 3300 // Load vectors (64 bytes long) 3301 instruct loadV64_dword(vecZ dst, memory mem) %{ 3302 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3303 match(Set dst (LoadVector mem)); 3304 ins_cost(125); 3305 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3306 ins_encode %{ 3307 int vector_len = 2; 3308 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3309 %} 3310 ins_pipe( pipe_slow ); 3311 %} 3312 3313 // Load vectors (64 bytes long) 3314 instruct loadV64_qword(vecZ dst, memory mem) %{ 3315 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3316 match(Set dst (LoadVector mem)); 3317 ins_cost(125); 3318 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3319 ins_encode %{ 3320 int vector_len = 2; 3321 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3322 %} 3323 ins_pipe( pipe_slow ); 3324 %} 3325 3326 // Store vectors 3327 instruct storeV1(memory mem, vecS src, rRegI tmp) %{ 3328 predicate(n->as_StoreVector()->memory_size() == 1); 3329 match(Set mem (StoreVector mem src)); 3330 ins_cost(145); 3331 effect(TEMP tmp); 3332 format %{ "movd $tmp,$src\n\t" 3333 "movb $mem,$tmp\t! store vector (1 byte)" %} 3334 ins_encode %{ 3335 __ movdl($tmp$$Register, $src$$XMMRegister); 3336 __ movb($mem$$Address, $tmp$$Register); 3337 %} 3338 ins_pipe( pipe_slow ); 3339 %} 3340 3341 instruct storeV2(memory mem, vecS src, rRegI tmp) %{ 3342 predicate(n->as_StoreVector()->memory_size() == 2); 3343 match(Set mem (StoreVector mem src)); 3344 ins_cost(145); 3345 effect(TEMP tmp); 3346 format %{ "movd $tmp,$src\n\t" 3347 "movw $mem,$tmp\t! store vector (2 bytes)" %} 3348 ins_encode %{ 3349 __ movdl($tmp$$Register, $src$$XMMRegister); 3350 __ movw($mem$$Address, $tmp$$Register); 3351 %} 3352 ins_pipe( pipe_slow ); 3353 %} 3354 3355 instruct storeV4(memory mem, vecS src) %{ 3356 predicate(n->as_StoreVector()->memory_size() == 4); 3357 match(Set mem (StoreVector mem src)); 3358 ins_cost(145); 3359 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3360 ins_encode %{ 3361 __ movdl($mem$$Address, $src$$XMMRegister); 3362 %} 3363 ins_pipe( pipe_slow ); 3364 %} 3365 3366 instruct storeV8(memory mem, vecD src) %{ 3367 predicate(n->as_StoreVector()->memory_size() == 8); 3368 match(Set mem (StoreVector mem src)); 3369 ins_cost(145); 3370 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3371 ins_encode %{ 3372 __ movq($mem$$Address, $src$$XMMRegister); 3373 %} 3374 ins_pipe( pipe_slow ); 3375 %} 3376 3377 instruct storeV16(memory mem, vecX src) %{ 3378 predicate(n->as_StoreVector()->memory_size() == 16); 3379 match(Set mem (StoreVector mem src)); 3380 ins_cost(145); 3381 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3382 ins_encode %{ 3383 __ movdqu($mem$$Address, $src$$XMMRegister); 3384 %} 3385 ins_pipe( pipe_slow ); 3386 %} 3387 3388 instruct storeV32(memory mem, vecY src) %{ 3389 predicate(n->as_StoreVector()->memory_size() == 32); 3390 match(Set mem (StoreVector mem src)); 3391 ins_cost(145); 3392 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3393 ins_encode %{ 3394 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3395 %} 3396 ins_pipe( pipe_slow ); 3397 %} 3398 3399 instruct storeV64_dword(memory mem, vecZ src) %{ 3400 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3401 match(Set mem (StoreVector mem src)); 3402 ins_cost(145); 3403 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3404 ins_encode %{ 3405 int vector_len = 2; 3406 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3407 %} 3408 ins_pipe( pipe_slow ); 3409 %} 3410 3411 instruct storeV64_qword(memory mem, vecZ src) %{ 3412 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3413 match(Set mem (StoreVector mem src)); 3414 ins_cost(145); 3415 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3416 ins_encode %{ 3417 int vector_len = 2; 3418 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3419 %} 3420 ins_pipe( pipe_slow ); 3421 %} 3422 3423 // ====================LEGACY REPLICATE======================================= 3424 3425 instruct Repl4B_mem(vecS dst, memory mem) %{ 3426 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3427 match(Set dst (ReplicateB (LoadB mem))); 3428 format %{ "punpcklbw $dst,$mem\n\t" 3429 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3430 ins_encode %{ 3431 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3432 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3433 %} 3434 ins_pipe( pipe_slow ); 3435 %} 3436 3437 instruct Repl8B_mem(vecD dst, memory mem) %{ 3438 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3439 match(Set dst (ReplicateB (LoadB mem))); 3440 format %{ "punpcklbw $dst,$mem\n\t" 3441 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3442 ins_encode %{ 3443 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3444 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3445 %} 3446 ins_pipe( pipe_slow ); 3447 %} 3448 3449 instruct Repl16B(vecX dst, rRegI src) %{ 3450 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3451 match(Set dst (ReplicateB src)); 3452 format %{ "movd $dst,$src\n\t" 3453 "punpcklbw $dst,$dst\n\t" 3454 "pshuflw $dst,$dst,0x00\n\t" 3455 "punpcklqdq $dst,$dst\t! replicate16B" %} 3456 ins_encode %{ 3457 __ movdl($dst$$XMMRegister, $src$$Register); 3458 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3459 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3460 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3461 %} 3462 ins_pipe( pipe_slow ); 3463 %} 3464 3465 instruct Repl16B_mem(vecX dst, memory mem) %{ 3466 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3467 match(Set dst (ReplicateB (LoadB mem))); 3468 format %{ "punpcklbw $dst,$mem\n\t" 3469 "pshuflw $dst,$dst,0x00\n\t" 3470 "punpcklqdq $dst,$dst\t! replicate16B" %} 3471 ins_encode %{ 3472 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3473 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3474 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3475 %} 3476 ins_pipe( pipe_slow ); 3477 %} 3478 3479 instruct Repl32B(vecY dst, rRegI src) %{ 3480 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3481 match(Set dst (ReplicateB src)); 3482 format %{ "movd $dst,$src\n\t" 3483 "punpcklbw $dst,$dst\n\t" 3484 "pshuflw $dst,$dst,0x00\n\t" 3485 "punpcklqdq $dst,$dst\n\t" 3486 "vinserti128_high $dst,$dst\t! replicate32B" %} 3487 ins_encode %{ 3488 __ movdl($dst$$XMMRegister, $src$$Register); 3489 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3490 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3491 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3492 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3493 %} 3494 ins_pipe( pipe_slow ); 3495 %} 3496 3497 instruct Repl32B_mem(vecY dst, memory mem) %{ 3498 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3499 match(Set dst (ReplicateB (LoadB mem))); 3500 format %{ "punpcklbw $dst,$mem\n\t" 3501 "pshuflw $dst,$dst,0x00\n\t" 3502 "punpcklqdq $dst,$dst\n\t" 3503 "vinserti128_high $dst,$dst\t! replicate32B" %} 3504 ins_encode %{ 3505 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3506 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3507 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3508 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3509 %} 3510 ins_pipe( pipe_slow ); 3511 %} 3512 3513 instruct Repl16B_imm(vecX dst, immI con) %{ 3514 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3515 match(Set dst (ReplicateB con)); 3516 format %{ "movq $dst,[$constantaddress]\n\t" 3517 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3518 ins_encode %{ 3519 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3520 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3521 %} 3522 ins_pipe( pipe_slow ); 3523 %} 3524 3525 instruct Repl32B_imm(vecY dst, immI con) %{ 3526 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3527 match(Set dst (ReplicateB con)); 3528 format %{ "movq $dst,[$constantaddress]\n\t" 3529 "punpcklqdq $dst,$dst\n\t" 3530 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3531 ins_encode %{ 3532 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3533 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3534 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3535 %} 3536 ins_pipe( pipe_slow ); 3537 %} 3538 3539 instruct Repl4S(vecD dst, rRegI src) %{ 3540 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3541 match(Set dst (ReplicateS src)); 3542 format %{ "movd $dst,$src\n\t" 3543 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3544 ins_encode %{ 3545 __ movdl($dst$$XMMRegister, $src$$Register); 3546 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3547 %} 3548 ins_pipe( pipe_slow ); 3549 %} 3550 3551 instruct Repl4S_mem(vecD dst, memory mem) %{ 3552 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3553 match(Set dst (ReplicateS (LoadS mem))); 3554 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3555 ins_encode %{ 3556 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3557 %} 3558 ins_pipe( pipe_slow ); 3559 %} 3560 3561 instruct Repl8S(vecX dst, rRegI src) %{ 3562 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3563 match(Set dst (ReplicateS src)); 3564 format %{ "movd $dst,$src\n\t" 3565 "pshuflw $dst,$dst,0x00\n\t" 3566 "punpcklqdq $dst,$dst\t! replicate8S" %} 3567 ins_encode %{ 3568 __ movdl($dst$$XMMRegister, $src$$Register); 3569 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3570 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3571 %} 3572 ins_pipe( pipe_slow ); 3573 %} 3574 3575 instruct Repl8S_mem(vecX dst, memory mem) %{ 3576 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3577 match(Set dst (ReplicateS (LoadS mem))); 3578 format %{ "pshuflw $dst,$mem,0x00\n\t" 3579 "punpcklqdq $dst,$dst\t! replicate8S" %} 3580 ins_encode %{ 3581 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3582 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3583 %} 3584 ins_pipe( pipe_slow ); 3585 %} 3586 3587 instruct Repl8S_imm(vecX dst, immI con) %{ 3588 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3589 match(Set dst (ReplicateS con)); 3590 format %{ "movq $dst,[$constantaddress]\n\t" 3591 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3592 ins_encode %{ 3593 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3594 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3595 %} 3596 ins_pipe( pipe_slow ); 3597 %} 3598 3599 instruct Repl16S(vecY dst, rRegI src) %{ 3600 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3601 match(Set dst (ReplicateS src)); 3602 format %{ "movd $dst,$src\n\t" 3603 "pshuflw $dst,$dst,0x00\n\t" 3604 "punpcklqdq $dst,$dst\n\t" 3605 "vinserti128_high $dst,$dst\t! replicate16S" %} 3606 ins_encode %{ 3607 __ movdl($dst$$XMMRegister, $src$$Register); 3608 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3609 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3610 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3611 %} 3612 ins_pipe( pipe_slow ); 3613 %} 3614 3615 instruct Repl16S_mem(vecY dst, memory mem) %{ 3616 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3617 match(Set dst (ReplicateS (LoadS mem))); 3618 format %{ "pshuflw $dst,$mem,0x00\n\t" 3619 "punpcklqdq $dst,$dst\n\t" 3620 "vinserti128_high $dst,$dst\t! replicate16S" %} 3621 ins_encode %{ 3622 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3623 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3624 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3625 %} 3626 ins_pipe( pipe_slow ); 3627 %} 3628 3629 instruct Repl16S_imm(vecY dst, immI con) %{ 3630 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3631 match(Set dst (ReplicateS con)); 3632 format %{ "movq $dst,[$constantaddress]\n\t" 3633 "punpcklqdq $dst,$dst\n\t" 3634 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3635 ins_encode %{ 3636 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3637 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3638 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 3643 instruct Repl4I(vecX dst, rRegI src) %{ 3644 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3645 match(Set dst (ReplicateI src)); 3646 format %{ "movd $dst,$src\n\t" 3647 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3648 ins_encode %{ 3649 __ movdl($dst$$XMMRegister, $src$$Register); 3650 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3651 %} 3652 ins_pipe( pipe_slow ); 3653 %} 3654 3655 instruct Repl4I_mem(vecX dst, memory mem) %{ 3656 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3657 match(Set dst (ReplicateI (LoadI mem))); 3658 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3659 ins_encode %{ 3660 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 instruct Repl8I(vecY dst, rRegI src) %{ 3666 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3667 match(Set dst (ReplicateI src)); 3668 format %{ "movd $dst,$src\n\t" 3669 "pshufd $dst,$dst,0x00\n\t" 3670 "vinserti128_high $dst,$dst\t! replicate8I" %} 3671 ins_encode %{ 3672 __ movdl($dst$$XMMRegister, $src$$Register); 3673 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3674 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3675 %} 3676 ins_pipe( pipe_slow ); 3677 %} 3678 3679 instruct Repl8I_mem(vecY dst, memory mem) %{ 3680 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3681 match(Set dst (ReplicateI (LoadI mem))); 3682 format %{ "pshufd $dst,$mem,0x00\n\t" 3683 "vinserti128_high $dst,$dst\t! replicate8I" %} 3684 ins_encode %{ 3685 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3686 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3687 %} 3688 ins_pipe( pipe_slow ); 3689 %} 3690 3691 instruct Repl4I_imm(vecX dst, immI con) %{ 3692 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3693 match(Set dst (ReplicateI con)); 3694 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3695 "punpcklqdq $dst,$dst" %} 3696 ins_encode %{ 3697 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3698 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3699 %} 3700 ins_pipe( pipe_slow ); 3701 %} 3702 3703 instruct Repl8I_imm(vecY dst, immI con) %{ 3704 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3705 match(Set dst (ReplicateI con)); 3706 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3707 "punpcklqdq $dst,$dst\n\t" 3708 "vinserti128_high $dst,$dst" %} 3709 ins_encode %{ 3710 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3711 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3712 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 // Long could be loaded into xmm register directly from memory. 3718 instruct Repl2L_mem(vecX dst, memory mem) %{ 3719 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3720 match(Set dst (ReplicateL (LoadL mem))); 3721 format %{ "movq $dst,$mem\n\t" 3722 "punpcklqdq $dst,$dst\t! replicate2L" %} 3723 ins_encode %{ 3724 __ movq($dst$$XMMRegister, $mem$$Address); 3725 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3726 %} 3727 ins_pipe( pipe_slow ); 3728 %} 3729 3730 // Replicate long (8 byte) scalar to be vector 3731 #ifdef _LP64 3732 instruct Repl4L(vecY dst, rRegL src) %{ 3733 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3734 match(Set dst (ReplicateL src)); 3735 format %{ "movdq $dst,$src\n\t" 3736 "punpcklqdq $dst,$dst\n\t" 3737 "vinserti128_high $dst,$dst\t! replicate4L" %} 3738 ins_encode %{ 3739 __ movdq($dst$$XMMRegister, $src$$Register); 3740 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3741 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3742 %} 3743 ins_pipe( pipe_slow ); 3744 %} 3745 #else // _LP64 3746 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3747 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3748 match(Set dst (ReplicateL src)); 3749 effect(TEMP dst, USE src, TEMP tmp); 3750 format %{ "movdl $dst,$src.lo\n\t" 3751 "movdl $tmp,$src.hi\n\t" 3752 "punpckldq $dst,$tmp\n\t" 3753 "punpcklqdq $dst,$dst\n\t" 3754 "vinserti128_high $dst,$dst\t! replicate4L" %} 3755 ins_encode %{ 3756 __ movdl($dst$$XMMRegister, $src$$Register); 3757 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3758 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3759 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3760 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 #endif // _LP64 3765 3766 instruct Repl4L_imm(vecY dst, immL con) %{ 3767 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3768 match(Set dst (ReplicateL con)); 3769 format %{ "movq $dst,[$constantaddress]\n\t" 3770 "punpcklqdq $dst,$dst\n\t" 3771 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3772 ins_encode %{ 3773 __ movq($dst$$XMMRegister, $constantaddress($con)); 3774 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3775 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct Repl4L_mem(vecY dst, memory mem) %{ 3781 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3782 match(Set dst (ReplicateL (LoadL mem))); 3783 format %{ "movq $dst,$mem\n\t" 3784 "punpcklqdq $dst,$dst\n\t" 3785 "vinserti128_high $dst,$dst\t! replicate4L" %} 3786 ins_encode %{ 3787 __ movq($dst$$XMMRegister, $mem$$Address); 3788 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3789 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3790 %} 3791 ins_pipe( pipe_slow ); 3792 %} 3793 3794 instruct Repl2F_mem(vecD dst, memory mem) %{ 3795 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3796 match(Set dst (ReplicateF (LoadF mem))); 3797 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3798 ins_encode %{ 3799 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3800 %} 3801 ins_pipe( pipe_slow ); 3802 %} 3803 3804 instruct Repl4F_mem(vecX dst, memory mem) %{ 3805 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3806 match(Set dst (ReplicateF (LoadF mem))); 3807 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3808 ins_encode %{ 3809 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3810 %} 3811 ins_pipe( pipe_slow ); 3812 %} 3813 3814 instruct Repl8F(vecY dst, regF src) %{ 3815 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3816 match(Set dst (ReplicateF src)); 3817 format %{ "pshufd $dst,$src,0x00\n\t" 3818 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3819 ins_encode %{ 3820 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3821 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3822 %} 3823 ins_pipe( pipe_slow ); 3824 %} 3825 3826 instruct Repl8F_mem(vecY dst, memory mem) %{ 3827 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3828 match(Set dst (ReplicateF (LoadF mem))); 3829 format %{ "pshufd $dst,$mem,0x00\n\t" 3830 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3831 ins_encode %{ 3832 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3833 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3834 %} 3835 ins_pipe( pipe_slow ); 3836 %} 3837 3838 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3839 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3840 match(Set dst (ReplicateF zero)); 3841 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3842 ins_encode %{ 3843 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3844 %} 3845 ins_pipe( fpu_reg_reg ); 3846 %} 3847 3848 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3849 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3850 match(Set dst (ReplicateF zero)); 3851 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3852 ins_encode %{ 3853 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3854 %} 3855 ins_pipe( fpu_reg_reg ); 3856 %} 3857 3858 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3859 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3860 match(Set dst (ReplicateF zero)); 3861 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3862 ins_encode %{ 3863 int vector_len = 1; 3864 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3865 %} 3866 ins_pipe( fpu_reg_reg ); 3867 %} 3868 3869 instruct Repl2D_mem(vecX dst, memory mem) %{ 3870 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3871 match(Set dst (ReplicateD (LoadD mem))); 3872 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3873 ins_encode %{ 3874 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3875 %} 3876 ins_pipe( pipe_slow ); 3877 %} 3878 3879 instruct Repl4D(vecY dst, regD src) %{ 3880 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3881 match(Set dst (ReplicateD src)); 3882 format %{ "pshufd $dst,$src,0x44\n\t" 3883 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3884 ins_encode %{ 3885 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3886 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 instruct Repl4D_mem(vecY dst, memory mem) %{ 3892 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3893 match(Set dst (ReplicateD (LoadD mem))); 3894 format %{ "pshufd $dst,$mem,0x44\n\t" 3895 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3896 ins_encode %{ 3897 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3898 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3899 %} 3900 ins_pipe( pipe_slow ); 3901 %} 3902 3903 // Replicate double (8 byte) scalar zero to be vector 3904 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3905 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3906 match(Set dst (ReplicateD zero)); 3907 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3908 ins_encode %{ 3909 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3910 %} 3911 ins_pipe( fpu_reg_reg ); 3912 %} 3913 3914 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3915 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3916 match(Set dst (ReplicateD zero)); 3917 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3918 ins_encode %{ 3919 int vector_len = 1; 3920 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3921 %} 3922 ins_pipe( fpu_reg_reg ); 3923 %} 3924 3925 // ====================GENERIC REPLICATE========================================== 3926 3927 // Replicate byte scalar to be vector 3928 instruct Repl4B(vecS dst, rRegI src) %{ 3929 predicate(n->as_Vector()->length() == 4); 3930 match(Set dst (ReplicateB src)); 3931 format %{ "movd $dst,$src\n\t" 3932 "punpcklbw $dst,$dst\n\t" 3933 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3934 ins_encode %{ 3935 __ movdl($dst$$XMMRegister, $src$$Register); 3936 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3937 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3938 %} 3939 ins_pipe( pipe_slow ); 3940 %} 3941 3942 instruct Repl8B(vecD dst, rRegI src) %{ 3943 predicate(n->as_Vector()->length() == 8); 3944 match(Set dst (ReplicateB src)); 3945 format %{ "movd $dst,$src\n\t" 3946 "punpcklbw $dst,$dst\n\t" 3947 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3948 ins_encode %{ 3949 __ movdl($dst$$XMMRegister, $src$$Register); 3950 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3951 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3952 %} 3953 ins_pipe( pipe_slow ); 3954 %} 3955 3956 // Replicate byte scalar immediate to be vector by loading from const table. 3957 instruct Repl4B_imm(vecS dst, immI con) %{ 3958 predicate(n->as_Vector()->length() == 4); 3959 match(Set dst (ReplicateB con)); 3960 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3961 ins_encode %{ 3962 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3963 %} 3964 ins_pipe( pipe_slow ); 3965 %} 3966 3967 instruct Repl8B_imm(vecD dst, immI con) %{ 3968 predicate(n->as_Vector()->length() == 8); 3969 match(Set dst (ReplicateB con)); 3970 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3971 ins_encode %{ 3972 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3973 %} 3974 ins_pipe( pipe_slow ); 3975 %} 3976 3977 // Replicate byte scalar zero to be vector 3978 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3979 predicate(n->as_Vector()->length() == 4); 3980 match(Set dst (ReplicateB zero)); 3981 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3982 ins_encode %{ 3983 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3984 %} 3985 ins_pipe( fpu_reg_reg ); 3986 %} 3987 3988 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3989 predicate(n->as_Vector()->length() == 8); 3990 match(Set dst (ReplicateB zero)); 3991 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3992 ins_encode %{ 3993 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3994 %} 3995 ins_pipe( fpu_reg_reg ); 3996 %} 3997 3998 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3999 predicate(n->as_Vector()->length() == 16); 4000 match(Set dst (ReplicateB zero)); 4001 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 4002 ins_encode %{ 4003 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4004 %} 4005 ins_pipe( fpu_reg_reg ); 4006 %} 4007 4008 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 4009 predicate(n->as_Vector()->length() == 32); 4010 match(Set dst (ReplicateB zero)); 4011 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 4012 ins_encode %{ 4013 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4014 int vector_len = 1; 4015 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4016 %} 4017 ins_pipe( fpu_reg_reg ); 4018 %} 4019 4020 // Replicate char/short (2 byte) scalar to be vector 4021 instruct Repl2S(vecS dst, rRegI src) %{ 4022 predicate(n->as_Vector()->length() == 2); 4023 match(Set dst (ReplicateS src)); 4024 format %{ "movd $dst,$src\n\t" 4025 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 4026 ins_encode %{ 4027 __ movdl($dst$$XMMRegister, $src$$Register); 4028 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4029 %} 4030 ins_pipe( fpu_reg_reg ); 4031 %} 4032 4033 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 4034 instruct Repl2S_imm(vecS dst, immI con) %{ 4035 predicate(n->as_Vector()->length() == 2); 4036 match(Set dst (ReplicateS con)); 4037 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4038 ins_encode %{ 4039 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4040 %} 4041 ins_pipe( fpu_reg_reg ); 4042 %} 4043 4044 instruct Repl4S_imm(vecD dst, immI con) %{ 4045 predicate(n->as_Vector()->length() == 4); 4046 match(Set dst (ReplicateS con)); 4047 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4048 ins_encode %{ 4049 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4050 %} 4051 ins_pipe( fpu_reg_reg ); 4052 %} 4053 4054 // Replicate char/short (2 byte) scalar zero to be vector 4055 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4056 predicate(n->as_Vector()->length() == 2); 4057 match(Set dst (ReplicateS zero)); 4058 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4059 ins_encode %{ 4060 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4061 %} 4062 ins_pipe( fpu_reg_reg ); 4063 %} 4064 4065 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4066 predicate(n->as_Vector()->length() == 4); 4067 match(Set dst (ReplicateS zero)); 4068 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4069 ins_encode %{ 4070 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4071 %} 4072 ins_pipe( fpu_reg_reg ); 4073 %} 4074 4075 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4076 predicate(n->as_Vector()->length() == 8); 4077 match(Set dst (ReplicateS zero)); 4078 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4079 ins_encode %{ 4080 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4081 %} 4082 ins_pipe( fpu_reg_reg ); 4083 %} 4084 4085 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4086 predicate(n->as_Vector()->length() == 16); 4087 match(Set dst (ReplicateS zero)); 4088 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4089 ins_encode %{ 4090 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4091 int vector_len = 1; 4092 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4093 %} 4094 ins_pipe( fpu_reg_reg ); 4095 %} 4096 4097 // Replicate integer (4 byte) scalar to be vector 4098 instruct Repl2I(vecD dst, rRegI src) %{ 4099 predicate(n->as_Vector()->length() == 2); 4100 match(Set dst (ReplicateI src)); 4101 format %{ "movd $dst,$src\n\t" 4102 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4103 ins_encode %{ 4104 __ movdl($dst$$XMMRegister, $src$$Register); 4105 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4106 %} 4107 ins_pipe( fpu_reg_reg ); 4108 %} 4109 4110 // Integer could be loaded into xmm register directly from memory. 4111 instruct Repl2I_mem(vecD dst, memory mem) %{ 4112 predicate(n->as_Vector()->length() == 2); 4113 match(Set dst (ReplicateI (LoadI mem))); 4114 format %{ "movd $dst,$mem\n\t" 4115 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4116 ins_encode %{ 4117 __ movdl($dst$$XMMRegister, $mem$$Address); 4118 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4119 %} 4120 ins_pipe( fpu_reg_reg ); 4121 %} 4122 4123 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4124 instruct Repl2I_imm(vecD dst, immI con) %{ 4125 predicate(n->as_Vector()->length() == 2); 4126 match(Set dst (ReplicateI con)); 4127 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4128 ins_encode %{ 4129 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4130 %} 4131 ins_pipe( fpu_reg_reg ); 4132 %} 4133 4134 // Replicate integer (4 byte) scalar zero to be vector 4135 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4136 predicate(n->as_Vector()->length() == 2); 4137 match(Set dst (ReplicateI zero)); 4138 format %{ "pxor $dst,$dst\t! replicate2I" %} 4139 ins_encode %{ 4140 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4141 %} 4142 ins_pipe( fpu_reg_reg ); 4143 %} 4144 4145 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4146 predicate(n->as_Vector()->length() == 4); 4147 match(Set dst (ReplicateI zero)); 4148 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4149 ins_encode %{ 4150 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4151 %} 4152 ins_pipe( fpu_reg_reg ); 4153 %} 4154 4155 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4156 predicate(n->as_Vector()->length() == 8); 4157 match(Set dst (ReplicateI zero)); 4158 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4159 ins_encode %{ 4160 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4161 int vector_len = 1; 4162 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4163 %} 4164 ins_pipe( fpu_reg_reg ); 4165 %} 4166 4167 // Replicate long (8 byte) scalar to be vector 4168 #ifdef _LP64 4169 instruct Repl2L(vecX dst, rRegL src) %{ 4170 predicate(n->as_Vector()->length() == 2); 4171 match(Set dst (ReplicateL src)); 4172 format %{ "movdq $dst,$src\n\t" 4173 "punpcklqdq $dst,$dst\t! replicate2L" %} 4174 ins_encode %{ 4175 __ movdq($dst$$XMMRegister, $src$$Register); 4176 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4177 %} 4178 ins_pipe( pipe_slow ); 4179 %} 4180 #else // _LP64 4181 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4182 predicate(n->as_Vector()->length() == 2); 4183 match(Set dst (ReplicateL src)); 4184 effect(TEMP dst, USE src, TEMP tmp); 4185 format %{ "movdl $dst,$src.lo\n\t" 4186 "movdl $tmp,$src.hi\n\t" 4187 "punpckldq $dst,$tmp\n\t" 4188 "punpcklqdq $dst,$dst\t! replicate2L"%} 4189 ins_encode %{ 4190 __ movdl($dst$$XMMRegister, $src$$Register); 4191 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4192 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4193 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 #endif // _LP64 4198 4199 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4200 instruct Repl2L_imm(vecX dst, immL con) %{ 4201 predicate(n->as_Vector()->length() == 2); 4202 match(Set dst (ReplicateL con)); 4203 format %{ "movq $dst,[$constantaddress]\n\t" 4204 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4205 ins_encode %{ 4206 __ movq($dst$$XMMRegister, $constantaddress($con)); 4207 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4208 %} 4209 ins_pipe( pipe_slow ); 4210 %} 4211 4212 // Replicate long (8 byte) scalar zero to be vector 4213 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4214 predicate(n->as_Vector()->length() == 2); 4215 match(Set dst (ReplicateL zero)); 4216 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4217 ins_encode %{ 4218 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4219 %} 4220 ins_pipe( fpu_reg_reg ); 4221 %} 4222 4223 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4224 predicate(n->as_Vector()->length() == 4); 4225 match(Set dst (ReplicateL zero)); 4226 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4227 ins_encode %{ 4228 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4229 int vector_len = 1; 4230 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4231 %} 4232 ins_pipe( fpu_reg_reg ); 4233 %} 4234 4235 // Replicate float (4 byte) scalar to be vector 4236 instruct Repl2F(vecD dst, regF src) %{ 4237 predicate(n->as_Vector()->length() == 2); 4238 match(Set dst (ReplicateF src)); 4239 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4240 ins_encode %{ 4241 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4242 %} 4243 ins_pipe( fpu_reg_reg ); 4244 %} 4245 4246 instruct Repl4F(vecX dst, regF src) %{ 4247 predicate(n->as_Vector()->length() == 4); 4248 match(Set dst (ReplicateF src)); 4249 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4250 ins_encode %{ 4251 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4252 %} 4253 ins_pipe( pipe_slow ); 4254 %} 4255 4256 // Replicate double (8 bytes) scalar to be vector 4257 instruct Repl2D(vecX dst, regD src) %{ 4258 predicate(n->as_Vector()->length() == 2); 4259 match(Set dst (ReplicateD src)); 4260 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4261 ins_encode %{ 4262 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4263 %} 4264 ins_pipe( pipe_slow ); 4265 %} 4266 4267 // ====================EVEX REPLICATE============================================= 4268 4269 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4270 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4271 match(Set dst (ReplicateB (LoadB mem))); 4272 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4273 ins_encode %{ 4274 int vector_len = 0; 4275 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4276 %} 4277 ins_pipe( pipe_slow ); 4278 %} 4279 4280 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4281 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4282 match(Set dst (ReplicateB (LoadB mem))); 4283 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4284 ins_encode %{ 4285 int vector_len = 0; 4286 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4287 %} 4288 ins_pipe( pipe_slow ); 4289 %} 4290 4291 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4292 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4293 match(Set dst (ReplicateB src)); 4294 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4295 ins_encode %{ 4296 int vector_len = 0; 4297 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4298 %} 4299 ins_pipe( pipe_slow ); 4300 %} 4301 4302 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4303 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4304 match(Set dst (ReplicateB (LoadB mem))); 4305 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4306 ins_encode %{ 4307 int vector_len = 0; 4308 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4309 %} 4310 ins_pipe( pipe_slow ); 4311 %} 4312 4313 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4314 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4315 match(Set dst (ReplicateB src)); 4316 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4317 ins_encode %{ 4318 int vector_len = 1; 4319 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4320 %} 4321 ins_pipe( pipe_slow ); 4322 %} 4323 4324 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4325 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4326 match(Set dst (ReplicateB (LoadB mem))); 4327 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4328 ins_encode %{ 4329 int vector_len = 1; 4330 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4331 %} 4332 ins_pipe( pipe_slow ); 4333 %} 4334 4335 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4336 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4337 match(Set dst (ReplicateB src)); 4338 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4339 ins_encode %{ 4340 int vector_len = 2; 4341 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4342 %} 4343 ins_pipe( pipe_slow ); 4344 %} 4345 4346 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4347 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4348 match(Set dst (ReplicateB (LoadB mem))); 4349 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4350 ins_encode %{ 4351 int vector_len = 2; 4352 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4353 %} 4354 ins_pipe( pipe_slow ); 4355 %} 4356 4357 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4358 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4359 match(Set dst (ReplicateB con)); 4360 format %{ "movq $dst,[$constantaddress]\n\t" 4361 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4362 ins_encode %{ 4363 int vector_len = 0; 4364 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4365 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4366 %} 4367 ins_pipe( pipe_slow ); 4368 %} 4369 4370 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4371 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4372 match(Set dst (ReplicateB con)); 4373 format %{ "movq $dst,[$constantaddress]\n\t" 4374 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4375 ins_encode %{ 4376 int vector_len = 1; 4377 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4378 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4384 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4385 match(Set dst (ReplicateB con)); 4386 format %{ "movq $dst,[$constantaddress]\n\t" 4387 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4388 ins_encode %{ 4389 int vector_len = 2; 4390 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4391 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4397 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4398 match(Set dst (ReplicateB zero)); 4399 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4400 ins_encode %{ 4401 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4402 int vector_len = 2; 4403 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4404 %} 4405 ins_pipe( fpu_reg_reg ); 4406 %} 4407 4408 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4409 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4410 match(Set dst (ReplicateS src)); 4411 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4412 ins_encode %{ 4413 int vector_len = 0; 4414 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4415 %} 4416 ins_pipe( pipe_slow ); 4417 %} 4418 4419 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4420 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4421 match(Set dst (ReplicateS (LoadS mem))); 4422 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4423 ins_encode %{ 4424 int vector_len = 0; 4425 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4426 %} 4427 ins_pipe( pipe_slow ); 4428 %} 4429 4430 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4431 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4432 match(Set dst (ReplicateS src)); 4433 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4434 ins_encode %{ 4435 int vector_len = 0; 4436 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4437 %} 4438 ins_pipe( pipe_slow ); 4439 %} 4440 4441 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4442 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4443 match(Set dst (ReplicateS (LoadS mem))); 4444 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4445 ins_encode %{ 4446 int vector_len = 0; 4447 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4448 %} 4449 ins_pipe( pipe_slow ); 4450 %} 4451 4452 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4453 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4454 match(Set dst (ReplicateS src)); 4455 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4456 ins_encode %{ 4457 int vector_len = 1; 4458 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4459 %} 4460 ins_pipe( pipe_slow ); 4461 %} 4462 4463 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4464 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4465 match(Set dst (ReplicateS (LoadS mem))); 4466 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4467 ins_encode %{ 4468 int vector_len = 1; 4469 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4470 %} 4471 ins_pipe( pipe_slow ); 4472 %} 4473 4474 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4475 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4476 match(Set dst (ReplicateS src)); 4477 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4478 ins_encode %{ 4479 int vector_len = 2; 4480 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4481 %} 4482 ins_pipe( pipe_slow ); 4483 %} 4484 4485 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4486 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4487 match(Set dst (ReplicateS (LoadS mem))); 4488 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4489 ins_encode %{ 4490 int vector_len = 2; 4491 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4492 %} 4493 ins_pipe( pipe_slow ); 4494 %} 4495 4496 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4497 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4498 match(Set dst (ReplicateS con)); 4499 format %{ "movq $dst,[$constantaddress]\n\t" 4500 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4501 ins_encode %{ 4502 int vector_len = 0; 4503 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4504 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4505 %} 4506 ins_pipe( pipe_slow ); 4507 %} 4508 4509 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4510 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4511 match(Set dst (ReplicateS con)); 4512 format %{ "movq $dst,[$constantaddress]\n\t" 4513 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4514 ins_encode %{ 4515 int vector_len = 1; 4516 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4517 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4518 %} 4519 ins_pipe( pipe_slow ); 4520 %} 4521 4522 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4523 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4524 match(Set dst (ReplicateS con)); 4525 format %{ "movq $dst,[$constantaddress]\n\t" 4526 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4527 ins_encode %{ 4528 int vector_len = 2; 4529 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4530 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4531 %} 4532 ins_pipe( pipe_slow ); 4533 %} 4534 4535 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4536 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4537 match(Set dst (ReplicateS zero)); 4538 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4539 ins_encode %{ 4540 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4541 int vector_len = 2; 4542 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4543 %} 4544 ins_pipe( fpu_reg_reg ); 4545 %} 4546 4547 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4548 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4549 match(Set dst (ReplicateI src)); 4550 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4551 ins_encode %{ 4552 int vector_len = 0; 4553 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4554 %} 4555 ins_pipe( pipe_slow ); 4556 %} 4557 4558 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4559 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4560 match(Set dst (ReplicateI (LoadI mem))); 4561 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4562 ins_encode %{ 4563 int vector_len = 0; 4564 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4570 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4571 match(Set dst (ReplicateI src)); 4572 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4573 ins_encode %{ 4574 int vector_len = 1; 4575 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4576 %} 4577 ins_pipe( pipe_slow ); 4578 %} 4579 4580 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4581 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4582 match(Set dst (ReplicateI (LoadI mem))); 4583 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4584 ins_encode %{ 4585 int vector_len = 1; 4586 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4587 %} 4588 ins_pipe( pipe_slow ); 4589 %} 4590 4591 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4592 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4593 match(Set dst (ReplicateI src)); 4594 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4595 ins_encode %{ 4596 int vector_len = 2; 4597 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4603 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4604 match(Set dst (ReplicateI (LoadI mem))); 4605 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4606 ins_encode %{ 4607 int vector_len = 2; 4608 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4614 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4615 match(Set dst (ReplicateI con)); 4616 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4617 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4618 ins_encode %{ 4619 int vector_len = 0; 4620 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4621 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4627 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4628 match(Set dst (ReplicateI con)); 4629 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4630 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4631 ins_encode %{ 4632 int vector_len = 1; 4633 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4634 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4635 %} 4636 ins_pipe( pipe_slow ); 4637 %} 4638 4639 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4640 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4641 match(Set dst (ReplicateI con)); 4642 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4643 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4644 ins_encode %{ 4645 int vector_len = 2; 4646 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4647 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4648 %} 4649 ins_pipe( pipe_slow ); 4650 %} 4651 4652 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4653 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4654 match(Set dst (ReplicateI zero)); 4655 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4656 ins_encode %{ 4657 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4658 int vector_len = 2; 4659 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4660 %} 4661 ins_pipe( fpu_reg_reg ); 4662 %} 4663 4664 // Replicate long (8 byte) scalar to be vector 4665 #ifdef _LP64 4666 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4667 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4668 match(Set dst (ReplicateL src)); 4669 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4670 ins_encode %{ 4671 int vector_len = 1; 4672 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4673 %} 4674 ins_pipe( pipe_slow ); 4675 %} 4676 4677 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4678 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4679 match(Set dst (ReplicateL src)); 4680 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4681 ins_encode %{ 4682 int vector_len = 2; 4683 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4684 %} 4685 ins_pipe( pipe_slow ); 4686 %} 4687 #else // _LP64 4688 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4689 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4690 match(Set dst (ReplicateL src)); 4691 effect(TEMP dst, USE src, TEMP tmp); 4692 format %{ "movdl $dst,$src.lo\n\t" 4693 "movdl $tmp,$src.hi\n\t" 4694 "punpckldq $dst,$tmp\n\t" 4695 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4696 ins_encode %{ 4697 int vector_len = 1; 4698 __ movdl($dst$$XMMRegister, $src$$Register); 4699 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4700 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4701 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4702 %} 4703 ins_pipe( pipe_slow ); 4704 %} 4705 4706 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4707 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4708 match(Set dst (ReplicateL src)); 4709 effect(TEMP dst, USE src, TEMP tmp); 4710 format %{ "movdl $dst,$src.lo\n\t" 4711 "movdl $tmp,$src.hi\n\t" 4712 "punpckldq $dst,$tmp\n\t" 4713 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4714 ins_encode %{ 4715 int vector_len = 2; 4716 __ movdl($dst$$XMMRegister, $src$$Register); 4717 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4718 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4719 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4720 %} 4721 ins_pipe( pipe_slow ); 4722 %} 4723 #endif // _LP64 4724 4725 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4726 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4727 match(Set dst (ReplicateL con)); 4728 format %{ "movq $dst,[$constantaddress]\n\t" 4729 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4730 ins_encode %{ 4731 int vector_len = 1; 4732 __ movq($dst$$XMMRegister, $constantaddress($con)); 4733 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4734 %} 4735 ins_pipe( pipe_slow ); 4736 %} 4737 4738 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4739 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4740 match(Set dst (ReplicateL con)); 4741 format %{ "movq $dst,[$constantaddress]\n\t" 4742 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4743 ins_encode %{ 4744 int vector_len = 2; 4745 __ movq($dst$$XMMRegister, $constantaddress($con)); 4746 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4747 %} 4748 ins_pipe( pipe_slow ); 4749 %} 4750 4751 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4752 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4753 match(Set dst (ReplicateL (LoadL mem))); 4754 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4755 ins_encode %{ 4756 int vector_len = 0; 4757 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4758 %} 4759 ins_pipe( pipe_slow ); 4760 %} 4761 4762 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4763 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4764 match(Set dst (ReplicateL (LoadL mem))); 4765 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4766 ins_encode %{ 4767 int vector_len = 1; 4768 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4769 %} 4770 ins_pipe( pipe_slow ); 4771 %} 4772 4773 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4774 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4775 match(Set dst (ReplicateL (LoadL mem))); 4776 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4777 ins_encode %{ 4778 int vector_len = 2; 4779 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4785 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4786 match(Set dst (ReplicateL zero)); 4787 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4788 ins_encode %{ 4789 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4790 int vector_len = 2; 4791 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4792 %} 4793 ins_pipe( fpu_reg_reg ); 4794 %} 4795 4796 instruct Repl8F_evex(vecY dst, regF src) %{ 4797 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4798 match(Set dst (ReplicateF src)); 4799 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4800 ins_encode %{ 4801 int vector_len = 1; 4802 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4803 %} 4804 ins_pipe( pipe_slow ); 4805 %} 4806 4807 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4808 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4809 match(Set dst (ReplicateF (LoadF mem))); 4810 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4811 ins_encode %{ 4812 int vector_len = 1; 4813 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4814 %} 4815 ins_pipe( pipe_slow ); 4816 %} 4817 4818 instruct Repl16F_evex(vecZ dst, regF src) %{ 4819 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4820 match(Set dst (ReplicateF src)); 4821 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4822 ins_encode %{ 4823 int vector_len = 2; 4824 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4825 %} 4826 ins_pipe( pipe_slow ); 4827 %} 4828 4829 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4830 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4831 match(Set dst (ReplicateF (LoadF mem))); 4832 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4833 ins_encode %{ 4834 int vector_len = 2; 4835 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4836 %} 4837 ins_pipe( pipe_slow ); 4838 %} 4839 4840 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4841 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4842 match(Set dst (ReplicateF zero)); 4843 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4844 ins_encode %{ 4845 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4846 int vector_len = 2; 4847 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4848 %} 4849 ins_pipe( fpu_reg_reg ); 4850 %} 4851 4852 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4853 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4854 match(Set dst (ReplicateF zero)); 4855 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4856 ins_encode %{ 4857 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4858 int vector_len = 2; 4859 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4860 %} 4861 ins_pipe( fpu_reg_reg ); 4862 %} 4863 4864 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4865 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4866 match(Set dst (ReplicateF zero)); 4867 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4868 ins_encode %{ 4869 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4870 int vector_len = 2; 4871 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4872 %} 4873 ins_pipe( fpu_reg_reg ); 4874 %} 4875 4876 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4877 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4878 match(Set dst (ReplicateF zero)); 4879 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4880 ins_encode %{ 4881 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4882 int vector_len = 2; 4883 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4884 %} 4885 ins_pipe( fpu_reg_reg ); 4886 %} 4887 4888 instruct Repl4D_evex(vecY dst, regD src) %{ 4889 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4890 match(Set dst (ReplicateD src)); 4891 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4892 ins_encode %{ 4893 int vector_len = 1; 4894 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4900 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4901 match(Set dst (ReplicateD (LoadD mem))); 4902 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4903 ins_encode %{ 4904 int vector_len = 1; 4905 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 instruct Repl8D_evex(vecZ dst, regD src) %{ 4911 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4912 match(Set dst (ReplicateD src)); 4913 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4914 ins_encode %{ 4915 int vector_len = 2; 4916 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4917 %} 4918 ins_pipe( pipe_slow ); 4919 %} 4920 4921 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4922 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4923 match(Set dst (ReplicateD (LoadD mem))); 4924 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4925 ins_encode %{ 4926 int vector_len = 2; 4927 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4928 %} 4929 ins_pipe( pipe_slow ); 4930 %} 4931 4932 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4933 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4934 match(Set dst (ReplicateD zero)); 4935 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4936 ins_encode %{ 4937 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4938 int vector_len = 2; 4939 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4940 %} 4941 ins_pipe( fpu_reg_reg ); 4942 %} 4943 4944 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4945 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4946 match(Set dst (ReplicateD zero)); 4947 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4948 ins_encode %{ 4949 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4950 int vector_len = 2; 4951 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4952 %} 4953 ins_pipe( fpu_reg_reg ); 4954 %} 4955 4956 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4957 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4958 match(Set dst (ReplicateD zero)); 4959 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4960 ins_encode %{ 4961 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4962 int vector_len = 2; 4963 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4964 %} 4965 ins_pipe( fpu_reg_reg ); 4966 %} 4967 4968 // ====================REDUCTION ARITHMETIC======================================= 4969 4970 instruct rsadd8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 4971 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 4972 match(Set dst (AddReductionVI src1 src2)); 4973 effect(TEMP tmp, TEMP tmp2, TEMP dst); 4974 format %{ 4975 "pshufd $tmp,$src2,0x1\n\t" 4976 "paddb $tmp,$src2\n\t" 4977 "movzbl $dst,$src1\n\t" 4978 "pextrb $tmp2,$tmp, 0x0\n\t" 4979 "addb $dst,$tmp2\n\t" 4980 "pextrb $tmp2,$tmp, 0x1\n\t" 4981 "addb $dst,$tmp2\n\t" 4982 "pextrb $tmp2,$tmp, 0x2\n\t" 4983 "addb $dst,$tmp2\n\t" 4984 "pextrb $tmp2,$tmp, 0x3\n\t" 4985 "addb $dst,$tmp2\n\t" 4986 "movsbl $dst,$dst\t! add reduction8B" %} 4987 ins_encode %{ 4988 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4989 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 4990 __ movzbl($dst$$Register, $src1$$Register); 4991 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 4992 __ addb($dst$$Register, $tmp2$$Register); 4993 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 4994 __ addb($dst$$Register, $tmp2$$Register); 4995 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 4996 __ addb($dst$$Register, $tmp2$$Register); 4997 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 4998 __ addb($dst$$Register, $tmp2$$Register); 4999 __ movsbl($dst$$Register, $dst$$Register); 5000 %} 5001 ins_pipe( pipe_slow ); 5002 %} 5003 5004 instruct rsadd16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5005 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5006 match(Set dst (AddReductionVI src1 src2)); 5007 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5008 format %{ "pshufd $tmp,$src2,0xE\n\t" 5009 "paddb $tmp,$src2\n\t" 5010 "pshufd $tmp2,$tmp,0x1\n\t" 5011 "paddb $tmp,$tmp,$tmp2\n\t" 5012 "movzbl $dst,$src1\n\t" 5013 "pextrb $tmp3,$tmp, 0x0\n\t" 5014 "addb $dst,$tmp3\n\t" 5015 "pextrb $tmp3,$tmp, 0x1\n\t" 5016 "addb $dst,$tmp3\n\t" 5017 "pextrb $tmp3,$tmp, 0x2\n\t" 5018 "addb $dst,$tmp3\n\t" 5019 "pextrb $tmp3,$tmp, 0x3\n\t" 5020 "addb $dst,$tmp3\n\t" 5021 "movsbl $dst,$dst\t! add reduction16B" %} 5022 ins_encode %{ 5023 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5024 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 5025 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5026 __ paddb($tmp$$XMMRegister, $tmp2$$XMMRegister); 5027 __ movzbl($dst$$Register, $src1$$Register); 5028 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 5029 __ addb($dst$$Register, $tmp3$$Register); 5030 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 5031 __ addb($dst$$Register, $tmp3$$Register); 5032 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5033 __ addb($dst$$Register, $tmp3$$Register); 5034 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5035 __ addb($dst$$Register, $tmp3$$Register); 5036 __ movsbl($dst$$Register, $dst$$Register); 5037 %} 5038 ins_pipe( pipe_slow ); 5039 %} 5040 5041 instruct rvadd32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5042 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5043 match(Set dst (AddReductionVI src1 src2)); 5044 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5045 format %{ "vextracti128_high $tmp,$src2\n\t" 5046 "vpaddb $tmp,$tmp,$src2\n\t" 5047 "pshufd $tmp2,$tmp,0xE\n\t" 5048 "vpaddb $tmp,$tmp,$tmp2\n\t" 5049 "pshufd $tmp2,$tmp,0x1\n\t" 5050 "vpaddb $tmp,$tmp,$tmp2\n\t" 5051 "movzbl $dst,$src1\n\t" 5052 "pextrb $tmp3,$tmp, 0x0\n\t" 5053 "addb $dst,$tmp3\n\t" 5054 "pextrb $tmp3,$tmp, 0x1\n\t" 5055 "addb $dst,$tmp3\n\t" 5056 "pextrb $tmp3,$tmp, 0x2\n\t" 5057 "addb $dst,$tmp3\n\t" 5058 "pextrb $tmp3,$tmp, 0x3\n\t" 5059 "addb $dst,$tmp3\n\t" 5060 "movsbl $dst,$dst\t! add reduction32B" %} 5061 ins_encode %{ 5062 int vector_len = 0; 5063 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5064 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5065 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5066 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5067 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5068 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5069 __ movzbl($dst$$Register, $src1$$Register); 5070 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 5071 __ addb($dst$$Register, $tmp3$$Register); 5072 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 5073 __ addb($dst$$Register, $tmp3$$Register); 5074 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5075 __ addb($dst$$Register, $tmp3$$Register); 5076 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5077 __ addb($dst$$Register, $tmp3$$Register); 5078 __ movsbl($dst$$Register, $dst$$Register); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 5083 instruct rvadd64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5084 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5085 match(Set dst (AddReductionVI src1 src2)); 5086 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5087 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5088 "vpaddb $tmp2,$tmp2,$src2\n\t" 5089 "vextracti128_high $tmp,$tmp2\n\t" 5090 "vpaddb $tmp,$tmp,$tmp2\n\t" 5091 "pshufd $tmp2,$tmp,0xE\n\t" 5092 "vpaddb $tmp,$tmp,$tmp2\n\t" 5093 "pshufd $tmp2,$tmp,0x1\n\t" 5094 "vpaddb $tmp,$tmp,$tmp2\n\t" 5095 "movzbl $dst,$src1\n\t" 5096 "movdl $tmp3,$tmp\n\t" 5097 "addb $dst,$tmp3\n\t" 5098 "shrl $tmp3,0x8\n\t" 5099 "addb $dst,$tmp3\n\t" 5100 "shrl $tmp3,0x8\n\t" 5101 "addb $dst,$tmp3\n\t" 5102 "shrl $tmp3,0x8\n\t" 5103 "addb $dst,$tmp3\n\t" 5104 "movsbl $dst,$dst\t! add reduction64B" %} 5105 ins_encode %{ 5106 int vector_len = 0; 5107 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5108 __ vpaddb($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5109 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5110 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5111 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5112 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5113 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5114 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5115 __ movzbl($dst$$Register, $src1$$Register); 5116 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 5117 __ addb($dst$$Register, $tmp3$$Register); 5118 __ shrl($tmp3$$Register, 8); 5119 __ addb($dst$$Register, $tmp3$$Register); 5120 __ shrl($tmp3$$Register, 8); 5121 __ addb($dst$$Register, $tmp3$$Register); 5122 __ shrl($tmp3$$Register, 8); 5123 __ addb($dst$$Register, $tmp3$$Register); 5124 __ movsbl($dst$$Register, $dst$$Register); 5125 %} 5126 ins_pipe( pipe_slow ); 5127 %} 5128 5129 instruct rsadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5130 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5131 match(Set dst (AddReductionVI src1 src2)); 5132 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5133 format %{ 5134 "movdqu $tmp,$src2\n\t" 5135 "phaddw $tmp,$tmp\n\t" 5136 "phaddw $tmp,$tmp\n\t" 5137 "movzwl $dst,$src1\n\t" 5138 "pextrw $tmp2,$tmp, 0x0\n\t" 5139 "addw $dst,$tmp2\n\t" 5140 "movswl $dst,$dst\t! add reduction4S" %} 5141 ins_encode %{ 5142 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 5143 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5144 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5145 __ movzwl($dst$$Register, $src1$$Register); 5146 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5147 __ addw($dst$$Register, $tmp2$$Register); 5148 __ movswl($dst$$Register, $dst$$Register); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 5153 instruct rvadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5154 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5155 match(Set dst (AddReductionVI src1 src2)); 5156 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5157 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5158 "vphaddw $tmp,$tmp,$tmp\n\t" 5159 "movzwl $dst,$src1\n\t" 5160 "pextrw $tmp2,$tmp, 0x0\n\t" 5161 "addw $dst,$tmp2\n\t" 5162 "movswl $dst,$dst\t! add reduction4S" %} 5163 ins_encode %{ 5164 int vector_len = 0; 5165 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5166 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5167 __ movzwl($dst$$Register, $src1$$Register); 5168 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5169 __ addw($dst$$Register, $tmp2$$Register); 5170 __ movswl($dst$$Register, $dst$$Register); 5171 %} 5172 ins_pipe( pipe_slow ); 5173 %} 5174 5175 instruct rsadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5176 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5177 match(Set dst (AddReductionVI src1 src2)); 5178 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5179 format %{ 5180 "movdqu $tmp,$src2\n\t" 5181 "phaddw $tmp,$tmp\n\t" 5182 "phaddw $tmp,$tmp\n\t" 5183 "phaddw $tmp,$tmp\n\t" 5184 "movzwl $dst,$src1\n\t" 5185 "pextrw $tmp2,$tmp, 0x0\n\t" 5186 "addw $dst,$tmp2\n\t" 5187 "movswl $dst,$dst\t! add reduction8S" %} 5188 ins_encode %{ 5189 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5190 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5191 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5192 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5193 __ movzwl($dst$$Register, $src1$$Register); 5194 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5195 __ addw($dst$$Register, $tmp2$$Register); 5196 __ movswl($dst$$Register, $dst$$Register); 5197 %} 5198 ins_pipe( pipe_slow ); 5199 %} 5200 5201 instruct rvadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5202 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5203 match(Set dst (AddReductionVI src1 src2)); 5204 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5205 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5206 "vphaddw $tmp,$tmp,$tmp\n\t" 5207 "vphaddw $tmp,$tmp,$tmp\n\t" 5208 "movzwl $dst,$src1\n\t" 5209 "pextrw $tmp2,$tmp, 0x0\n\t" 5210 "addw $dst,$tmp2\n\t" 5211 "movswl $dst,$dst\t! add reduction8S" %} 5212 ins_encode %{ 5213 int vector_len = 0; 5214 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5215 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5216 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5217 __ movzwl($dst$$Register, $src1$$Register); 5218 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5219 __ addw($dst$$Register, $tmp2$$Register); 5220 __ movswl($dst$$Register, $dst$$Register); 5221 %} 5222 ins_pipe( pipe_slow ); 5223 %} 5224 5225 instruct rvadd16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2) %{ 5226 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5227 match(Set dst (AddReductionVI src1 src2)); 5228 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5229 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5230 "vphaddw $tmp,$tmp,$tmp\n\t" 5231 "vphaddw $tmp,$tmp,$tmp\n\t" 5232 "vphaddw $tmp,$tmp,$tmp\n\t" 5233 "movzwl $dst,$src1\n\t" 5234 "pextrw $tmp2,$tmp, 0x0\n\t" 5235 "addw $dst,$tmp2\n\t" 5236 "movswl $dst,$dst\t! add reduction16S" %} 5237 ins_encode %{ 5238 int vector_len = 1; 5239 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5240 __ vpermq($tmp$$XMMRegister, $tmp$$XMMRegister, 0xD8, vector_len); 5241 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5242 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5243 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5244 __ movzwl($dst$$Register, $src1$$Register); 5245 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5246 __ addw($dst$$Register, $tmp2$$Register); 5247 __ movswl($dst$$Register, $dst$$Register); 5248 %} 5249 ins_pipe( pipe_slow ); 5250 %} 5251 5252 instruct rvadd32S_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5253 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5254 match(Set dst (AddReductionVI src1 src2)); 5255 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5256 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5257 "vpaddw $tmp2,$tmp2,$src2\n\t" 5258 "vextracti128_high $tmp,$tmp2\n\t" 5259 "vpaddw $tmp,$tmp,$tmp2\n\t" 5260 "pshufd $tmp2,$tmp,0xE\n\t" 5261 "vpaddw $tmp,$tmp,$tmp2\n\t" 5262 "pshufd $tmp2,$tmp,0x1\n\t" 5263 "vpaddw $tmp,$tmp,$tmp2\n\t" 5264 "movdl $tmp3,$tmp\n\t" 5265 "addw $dst,$tmp3\n\t" 5266 "shrl $tmp3,0x16\n\t" 5267 "addw $dst,$tmp3\n\t" 5268 "movswl $dst,$dst\t! add reduction32S" %} 5269 ins_encode %{ 5270 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5271 __ vpaddw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5272 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5273 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5274 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5275 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5276 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5277 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5278 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 5279 __ addw($dst$$Register, $tmp3$$Register); 5280 __ shrl($tmp3$$Register, 16); 5281 __ addw($dst$$Register, $tmp3$$Register); 5282 __ movswl($dst$$Register, $dst$$Register); 5283 %} 5284 ins_pipe( pipe_slow ); 5285 %} 5286 5287 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5288 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5289 match(Set dst (AddReductionVI src1 src2)); 5290 effect(TEMP tmp2, TEMP tmp); 5291 format %{ "movdqu $tmp2,$src2\n\t" 5292 "phaddd $tmp2,$tmp2\n\t" 5293 "movd $tmp,$src1\n\t" 5294 "paddd $tmp,$tmp2\n\t" 5295 "movd $dst,$tmp\t! add reduction2I" %} 5296 ins_encode %{ 5297 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 5298 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5299 __ movdl($tmp$$XMMRegister, $src1$$Register); 5300 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 5301 __ movdl($dst$$Register, $tmp$$XMMRegister); 5302 %} 5303 ins_pipe( pipe_slow ); 5304 %} 5305 5306 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5307 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5308 match(Set dst (AddReductionVI src1 src2)); 5309 effect(TEMP tmp, TEMP tmp2); 5310 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5311 "movd $tmp2,$src1\n\t" 5312 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5313 "movd $dst,$tmp2\t! add reduction2I" %} 5314 ins_encode %{ 5315 int vector_len = 0; 5316 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5317 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5318 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5319 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5320 %} 5321 ins_pipe( pipe_slow ); 5322 %} 5323 5324 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5325 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5326 match(Set dst (AddReductionVI src1 src2)); 5327 effect(TEMP tmp, TEMP tmp2); 5328 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5329 "vpaddd $tmp,$src2,$tmp2\n\t" 5330 "movd $tmp2,$src1\n\t" 5331 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5332 "movd $dst,$tmp2\t! add reduction2I" %} 5333 ins_encode %{ 5334 int vector_len = 0; 5335 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5336 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5337 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5338 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5339 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5340 %} 5341 ins_pipe( pipe_slow ); 5342 %} 5343 5344 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5345 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5346 match(Set dst (AddReductionVI src1 src2)); 5347 effect(TEMP tmp, TEMP tmp2); 5348 format %{ "movdqu $tmp,$src2\n\t" 5349 "phaddd $tmp,$tmp\n\t" 5350 "phaddd $tmp,$tmp\n\t" 5351 "movd $tmp2,$src1\n\t" 5352 "paddd $tmp2,$tmp\n\t" 5353 "movd $dst,$tmp2\t! add reduction4I" %} 5354 ins_encode %{ 5355 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5356 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5357 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5358 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5359 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5360 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5361 %} 5362 ins_pipe( pipe_slow ); 5363 %} 5364 5365 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5366 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5367 match(Set dst (AddReductionVI src1 src2)); 5368 effect(TEMP tmp, TEMP tmp2); 5369 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5370 "vphaddd $tmp,$tmp,$tmp\n\t" 5371 "movd $tmp2,$src1\n\t" 5372 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5373 "movd $dst,$tmp2\t! add reduction4I" %} 5374 ins_encode %{ 5375 int vector_len = 0; 5376 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5377 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5378 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5379 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5380 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5381 %} 5382 ins_pipe( pipe_slow ); 5383 %} 5384 5385 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5386 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5387 match(Set dst (AddReductionVI src1 src2)); 5388 effect(TEMP tmp, TEMP tmp2); 5389 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5390 "vpaddd $tmp,$src2,$tmp2\n\t" 5391 "pshufd $tmp2,$tmp,0x1\n\t" 5392 "vpaddd $tmp,$tmp,$tmp2\n\t" 5393 "movd $tmp2,$src1\n\t" 5394 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5395 "movd $dst,$tmp2\t! add reduction4I" %} 5396 ins_encode %{ 5397 int vector_len = 0; 5398 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5399 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5400 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5401 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5402 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5403 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5404 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5410 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5411 match(Set dst (AddReductionVI src1 src2)); 5412 effect(TEMP tmp, TEMP tmp2); 5413 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5414 "vphaddd $tmp,$tmp,$tmp2\n\t" 5415 "vextracti128_high $tmp2,$tmp\n\t" 5416 "vpaddd $tmp,$tmp,$tmp2\n\t" 5417 "movd $tmp2,$src1\n\t" 5418 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5419 "movd $dst,$tmp2\t! add reduction8I" %} 5420 ins_encode %{ 5421 int vector_len = 1; 5422 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5423 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5424 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5425 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5426 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5427 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5428 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5429 %} 5430 ins_pipe( pipe_slow ); 5431 %} 5432 5433 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5434 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5435 match(Set dst (AddReductionVI src1 src2)); 5436 effect(TEMP tmp, TEMP tmp2); 5437 format %{ "vextracti128_high $tmp,$src2\n\t" 5438 "vpaddd $tmp,$tmp,$src2\n\t" 5439 "pshufd $tmp2,$tmp,0xE\n\t" 5440 "vpaddd $tmp,$tmp,$tmp2\n\t" 5441 "pshufd $tmp2,$tmp,0x1\n\t" 5442 "vpaddd $tmp,$tmp,$tmp2\n\t" 5443 "movd $tmp2,$src1\n\t" 5444 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5445 "movd $dst,$tmp2\t! add reduction8I" %} 5446 ins_encode %{ 5447 int vector_len = 0; 5448 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5449 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5450 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5451 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5452 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5453 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5454 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5455 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5456 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5462 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5463 match(Set dst (AddReductionVI src1 src2)); 5464 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5465 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5466 "vpaddd $tmp3,$tmp3,$src2\n\t" 5467 "vextracti128_high $tmp,$tmp3\n\t" 5468 "vpaddd $tmp,$tmp,$tmp3\n\t" 5469 "pshufd $tmp2,$tmp,0xE\n\t" 5470 "vpaddd $tmp,$tmp,$tmp2\n\t" 5471 "pshufd $tmp2,$tmp,0x1\n\t" 5472 "vpaddd $tmp,$tmp,$tmp2\n\t" 5473 "movd $tmp2,$src1\n\t" 5474 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5475 "movd $dst,$tmp2\t! mul reduction16I" %} 5476 ins_encode %{ 5477 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5478 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5479 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5480 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5481 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5482 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5483 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5484 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5485 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5486 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5487 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5488 %} 5489 ins_pipe( pipe_slow ); 5490 %} 5491 5492 #ifdef _LP64 5493 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5494 predicate(UseAVX > 2); 5495 match(Set dst (AddReductionVL src1 src2)); 5496 effect(TEMP tmp, TEMP tmp2); 5497 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5498 "vpaddq $tmp,$src2,$tmp2\n\t" 5499 "movdq $tmp2,$src1\n\t" 5500 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5501 "movdq $dst,$tmp2\t! add reduction2L" %} 5502 ins_encode %{ 5503 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5504 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5505 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5506 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5507 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5508 %} 5509 ins_pipe( pipe_slow ); 5510 %} 5511 5512 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5513 predicate(UseAVX > 2); 5514 match(Set dst (AddReductionVL src1 src2)); 5515 effect(TEMP tmp, TEMP tmp2); 5516 format %{ "vextracti128_high $tmp,$src2\n\t" 5517 "vpaddq $tmp2,$tmp,$src2\n\t" 5518 "pshufd $tmp,$tmp2,0xE\n\t" 5519 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5520 "movdq $tmp,$src1\n\t" 5521 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5522 "movdq $dst,$tmp2\t! add reduction4L" %} 5523 ins_encode %{ 5524 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5525 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5526 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5527 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5528 __ movdq($tmp$$XMMRegister, $src1$$Register); 5529 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5530 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5531 %} 5532 ins_pipe( pipe_slow ); 5533 %} 5534 5535 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5536 predicate(UseAVX > 2); 5537 match(Set dst (AddReductionVL src1 src2)); 5538 effect(TEMP tmp, TEMP tmp2); 5539 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5540 "vpaddq $tmp2,$tmp2,$src2\n\t" 5541 "vextracti128_high $tmp,$tmp2\n\t" 5542 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5543 "pshufd $tmp,$tmp2,0xE\n\t" 5544 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5545 "movdq $tmp,$src1\n\t" 5546 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5547 "movdq $dst,$tmp2\t! add reduction8L" %} 5548 ins_encode %{ 5549 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5550 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5551 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5552 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5553 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5554 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5555 __ movdq($tmp$$XMMRegister, $src1$$Register); 5556 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5557 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5558 %} 5559 ins_pipe( pipe_slow ); 5560 %} 5561 #endif 5562 5563 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5564 predicate(UseSSE >= 1 && UseAVX == 0); 5565 match(Set dst (AddReductionVF dst src2)); 5566 effect(TEMP dst, TEMP tmp); 5567 format %{ "addss $dst,$src2\n\t" 5568 "pshufd $tmp,$src2,0x01\n\t" 5569 "addss $dst,$tmp\t! add reduction2F" %} 5570 ins_encode %{ 5571 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5572 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5573 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5574 %} 5575 ins_pipe( pipe_slow ); 5576 %} 5577 5578 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5579 predicate(UseAVX > 0); 5580 match(Set dst (AddReductionVF dst src2)); 5581 effect(TEMP dst, TEMP tmp); 5582 format %{ "vaddss $dst,$dst,$src2\n\t" 5583 "pshufd $tmp,$src2,0x01\n\t" 5584 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5585 ins_encode %{ 5586 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5588 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5589 %} 5590 ins_pipe( pipe_slow ); 5591 %} 5592 5593 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5594 predicate(UseSSE >= 1 && UseAVX == 0); 5595 match(Set dst (AddReductionVF dst src2)); 5596 effect(TEMP dst, TEMP tmp); 5597 format %{ "addss $dst,$src2\n\t" 5598 "pshufd $tmp,$src2,0x01\n\t" 5599 "addss $dst,$tmp\n\t" 5600 "pshufd $tmp,$src2,0x02\n\t" 5601 "addss $dst,$tmp\n\t" 5602 "pshufd $tmp,$src2,0x03\n\t" 5603 "addss $dst,$tmp\t! add reduction4F" %} 5604 ins_encode %{ 5605 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5606 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5607 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5608 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5609 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5610 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5611 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5612 %} 5613 ins_pipe( pipe_slow ); 5614 %} 5615 5616 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5617 predicate(UseAVX > 0); 5618 match(Set dst (AddReductionVF dst src2)); 5619 effect(TEMP tmp, TEMP dst); 5620 format %{ "vaddss $dst,dst,$src2\n\t" 5621 "pshufd $tmp,$src2,0x01\n\t" 5622 "vaddss $dst,$dst,$tmp\n\t" 5623 "pshufd $tmp,$src2,0x02\n\t" 5624 "vaddss $dst,$dst,$tmp\n\t" 5625 "pshufd $tmp,$src2,0x03\n\t" 5626 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5627 ins_encode %{ 5628 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5629 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5630 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5631 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5632 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5633 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5634 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5635 %} 5636 ins_pipe( pipe_slow ); 5637 %} 5638 5639 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5640 predicate(UseAVX > 0); 5641 match(Set dst (AddReductionVF dst src2)); 5642 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5643 format %{ "vaddss $dst,$dst,$src2\n\t" 5644 "pshufd $tmp,$src2,0x01\n\t" 5645 "vaddss $dst,$dst,$tmp\n\t" 5646 "pshufd $tmp,$src2,0x02\n\t" 5647 "vaddss $dst,$dst,$tmp\n\t" 5648 "pshufd $tmp,$src2,0x03\n\t" 5649 "vaddss $dst,$dst,$tmp\n\t" 5650 "vextractf128_high $tmp2,$src2\n\t" 5651 "vaddss $dst,$dst,$tmp2\n\t" 5652 "pshufd $tmp,$tmp2,0x01\n\t" 5653 "vaddss $dst,$dst,$tmp\n\t" 5654 "pshufd $tmp,$tmp2,0x02\n\t" 5655 "vaddss $dst,$dst,$tmp\n\t" 5656 "pshufd $tmp,$tmp2,0x03\n\t" 5657 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5658 ins_encode %{ 5659 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5660 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5661 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5662 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5663 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5664 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5665 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5666 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5667 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5668 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5669 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5670 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5671 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5672 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5673 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5674 %} 5675 ins_pipe( pipe_slow ); 5676 %} 5677 5678 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5679 predicate(UseAVX > 2); 5680 match(Set dst (AddReductionVF dst src2)); 5681 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5682 format %{ "vaddss $dst,$dst,$src2\n\t" 5683 "pshufd $tmp,$src2,0x01\n\t" 5684 "vaddss $dst,$dst,$tmp\n\t" 5685 "pshufd $tmp,$src2,0x02\n\t" 5686 "vaddss $dst,$dst,$tmp\n\t" 5687 "pshufd $tmp,$src2,0x03\n\t" 5688 "vaddss $dst,$dst,$tmp\n\t" 5689 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5690 "vaddss $dst,$dst,$tmp2\n\t" 5691 "pshufd $tmp,$tmp2,0x01\n\t" 5692 "vaddss $dst,$dst,$tmp\n\t" 5693 "pshufd $tmp,$tmp2,0x02\n\t" 5694 "vaddss $dst,$dst,$tmp\n\t" 5695 "pshufd $tmp,$tmp2,0x03\n\t" 5696 "vaddss $dst,$dst,$tmp\n\t" 5697 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5698 "vaddss $dst,$dst,$tmp2\n\t" 5699 "pshufd $tmp,$tmp2,0x01\n\t" 5700 "vaddss $dst,$dst,$tmp\n\t" 5701 "pshufd $tmp,$tmp2,0x02\n\t" 5702 "vaddss $dst,$dst,$tmp\n\t" 5703 "pshufd $tmp,$tmp2,0x03\n\t" 5704 "vaddss $dst,$dst,$tmp\n\t" 5705 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5706 "vaddss $dst,$dst,$tmp2\n\t" 5707 "pshufd $tmp,$tmp2,0x01\n\t" 5708 "vaddss $dst,$dst,$tmp\n\t" 5709 "pshufd $tmp,$tmp2,0x02\n\t" 5710 "vaddss $dst,$dst,$tmp\n\t" 5711 "pshufd $tmp,$tmp2,0x03\n\t" 5712 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5713 ins_encode %{ 5714 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5715 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5716 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5717 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5718 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5719 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5720 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5721 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5722 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5723 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5724 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5725 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5726 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5727 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5728 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5729 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5730 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5731 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5732 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5733 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5734 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5735 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5736 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5737 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5738 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5739 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5740 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5741 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5742 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5743 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5744 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5745 %} 5746 ins_pipe( pipe_slow ); 5747 %} 5748 5749 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5750 predicate(UseSSE >= 1 && UseAVX == 0); 5751 match(Set dst (AddReductionVD dst src2)); 5752 effect(TEMP tmp, TEMP dst); 5753 format %{ "addsd $dst,$src2\n\t" 5754 "pshufd $tmp,$src2,0xE\n\t" 5755 "addsd $dst,$tmp\t! add reduction2D" %} 5756 ins_encode %{ 5757 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5758 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5759 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5760 %} 5761 ins_pipe( pipe_slow ); 5762 %} 5763 5764 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5765 predicate(UseAVX > 0); 5766 match(Set dst (AddReductionVD dst src2)); 5767 effect(TEMP tmp, TEMP dst); 5768 format %{ "vaddsd $dst,$dst,$src2\n\t" 5769 "pshufd $tmp,$src2,0xE\n\t" 5770 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5771 ins_encode %{ 5772 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5773 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5774 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5775 %} 5776 ins_pipe( pipe_slow ); 5777 %} 5778 5779 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5780 predicate(UseAVX > 0); 5781 match(Set dst (AddReductionVD dst src2)); 5782 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5783 format %{ "vaddsd $dst,$dst,$src2\n\t" 5784 "pshufd $tmp,$src2,0xE\n\t" 5785 "vaddsd $dst,$dst,$tmp\n\t" 5786 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5787 "vaddsd $dst,$dst,$tmp2\n\t" 5788 "pshufd $tmp,$tmp2,0xE\n\t" 5789 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5790 ins_encode %{ 5791 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5792 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5793 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5794 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5795 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5796 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5797 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5798 %} 5799 ins_pipe( pipe_slow ); 5800 %} 5801 5802 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5803 predicate(UseAVX > 2); 5804 match(Set dst (AddReductionVD dst src2)); 5805 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5806 format %{ "vaddsd $dst,$dst,$src2\n\t" 5807 "pshufd $tmp,$src2,0xE\n\t" 5808 "vaddsd $dst,$dst,$tmp\n\t" 5809 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5810 "vaddsd $dst,$dst,$tmp2\n\t" 5811 "pshufd $tmp,$tmp2,0xE\n\t" 5812 "vaddsd $dst,$dst,$tmp\n\t" 5813 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5814 "vaddsd $dst,$dst,$tmp2\n\t" 5815 "pshufd $tmp,$tmp2,0xE\n\t" 5816 "vaddsd $dst,$dst,$tmp\n\t" 5817 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5818 "vaddsd $dst,$dst,$tmp2\n\t" 5819 "pshufd $tmp,$tmp2,0xE\n\t" 5820 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5821 ins_encode %{ 5822 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5823 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5824 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5825 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5826 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5827 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5828 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5829 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5830 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5831 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5832 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5833 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5834 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5835 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5836 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5842 predicate(UseSSE > 3 && UseAVX == 0); 5843 match(Set dst (MulReductionVI src1 src2)); 5844 effect(TEMP tmp, TEMP tmp2); 5845 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5846 "pmulld $tmp2,$src2\n\t" 5847 "movd $tmp,$src1\n\t" 5848 "pmulld $tmp2,$tmp\n\t" 5849 "movd $dst,$tmp2\t! mul reduction2I" %} 5850 ins_encode %{ 5851 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5852 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5853 __ movdl($tmp$$XMMRegister, $src1$$Register); 5854 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5855 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5861 predicate(UseAVX > 0); 5862 match(Set dst (MulReductionVI src1 src2)); 5863 effect(TEMP tmp, TEMP tmp2); 5864 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5865 "vpmulld $tmp,$src2,$tmp2\n\t" 5866 "movd $tmp2,$src1\n\t" 5867 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5868 "movd $dst,$tmp2\t! mul reduction2I" %} 5869 ins_encode %{ 5870 int vector_len = 0; 5871 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5872 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5873 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5874 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5875 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5881 predicate(UseSSE > 3 && UseAVX == 0); 5882 match(Set dst (MulReductionVI src1 src2)); 5883 effect(TEMP tmp, TEMP tmp2); 5884 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5885 "pmulld $tmp2,$src2\n\t" 5886 "pshufd $tmp,$tmp2,0x1\n\t" 5887 "pmulld $tmp2,$tmp\n\t" 5888 "movd $tmp,$src1\n\t" 5889 "pmulld $tmp2,$tmp\n\t" 5890 "movd $dst,$tmp2\t! mul reduction4I" %} 5891 ins_encode %{ 5892 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5893 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5894 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5895 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5896 __ movdl($tmp$$XMMRegister, $src1$$Register); 5897 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5898 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5899 %} 5900 ins_pipe( pipe_slow ); 5901 %} 5902 5903 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5904 predicate(UseAVX > 0); 5905 match(Set dst (MulReductionVI src1 src2)); 5906 effect(TEMP tmp, TEMP tmp2); 5907 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5908 "vpmulld $tmp,$src2,$tmp2\n\t" 5909 "pshufd $tmp2,$tmp,0x1\n\t" 5910 "vpmulld $tmp,$tmp,$tmp2\n\t" 5911 "movd $tmp2,$src1\n\t" 5912 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5913 "movd $dst,$tmp2\t! mul reduction4I" %} 5914 ins_encode %{ 5915 int vector_len = 0; 5916 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5917 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5918 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5919 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5920 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5921 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5922 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5928 predicate(UseAVX > 0); 5929 match(Set dst (MulReductionVI src1 src2)); 5930 effect(TEMP tmp, TEMP tmp2); 5931 format %{ "vextracti128_high $tmp,$src2\n\t" 5932 "vpmulld $tmp,$tmp,$src2\n\t" 5933 "pshufd $tmp2,$tmp,0xE\n\t" 5934 "vpmulld $tmp,$tmp,$tmp2\n\t" 5935 "pshufd $tmp2,$tmp,0x1\n\t" 5936 "vpmulld $tmp,$tmp,$tmp2\n\t" 5937 "movd $tmp2,$src1\n\t" 5938 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5939 "movd $dst,$tmp2\t! mul reduction8I" %} 5940 ins_encode %{ 5941 int vector_len = 0; 5942 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5943 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5944 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5945 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5946 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5947 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5948 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5949 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5950 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5951 %} 5952 ins_pipe( pipe_slow ); 5953 %} 5954 5955 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5956 predicate(UseAVX > 2); 5957 match(Set dst (MulReductionVI src1 src2)); 5958 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5959 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5960 "vpmulld $tmp3,$tmp3,$src2\n\t" 5961 "vextracti128_high $tmp,$tmp3\n\t" 5962 "vpmulld $tmp,$tmp,$src2\n\t" 5963 "pshufd $tmp2,$tmp,0xE\n\t" 5964 "vpmulld $tmp,$tmp,$tmp2\n\t" 5965 "pshufd $tmp2,$tmp,0x1\n\t" 5966 "vpmulld $tmp,$tmp,$tmp2\n\t" 5967 "movd $tmp2,$src1\n\t" 5968 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5969 "movd $dst,$tmp2\t! mul reduction16I" %} 5970 ins_encode %{ 5971 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5972 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5973 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5974 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5975 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5976 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5977 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5978 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5979 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5980 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5981 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5982 %} 5983 ins_pipe( pipe_slow ); 5984 %} 5985 5986 #ifdef _LP64 5987 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5988 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5989 match(Set dst (MulReductionVL src1 src2)); 5990 effect(TEMP tmp, TEMP tmp2); 5991 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5992 "vpmullq $tmp,$src2,$tmp2\n\t" 5993 "movdq $tmp2,$src1\n\t" 5994 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5995 "movdq $dst,$tmp2\t! mul reduction2L" %} 5996 ins_encode %{ 5997 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5998 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5999 __ movdq($tmp2$$XMMRegister, $src1$$Register); 6000 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6001 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6002 %} 6003 ins_pipe( pipe_slow ); 6004 %} 6005 6006 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6007 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 6008 match(Set dst (MulReductionVL src1 src2)); 6009 effect(TEMP tmp, TEMP tmp2); 6010 format %{ "vextracti128_high $tmp,$src2\n\t" 6011 "vpmullq $tmp2,$tmp,$src2\n\t" 6012 "pshufd $tmp,$tmp2,0xE\n\t" 6013 "vpmullq $tmp2,$tmp2,$tmp\n\t" 6014 "movdq $tmp,$src1\n\t" 6015 "vpmullq $tmp2,$tmp2,$tmp\n\t" 6016 "movdq $dst,$tmp2\t! mul reduction4L" %} 6017 ins_encode %{ 6018 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6019 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 6020 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6021 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6022 __ movdq($tmp$$XMMRegister, $src1$$Register); 6023 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6024 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6030 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 6031 match(Set dst (MulReductionVL src1 src2)); 6032 effect(TEMP tmp, TEMP tmp2); 6033 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6034 "vpmullq $tmp2,$tmp2,$src2\n\t" 6035 "vextracti128_high $tmp,$tmp2\n\t" 6036 "vpmullq $tmp2,$tmp2,$tmp\n\t" 6037 "pshufd $tmp,$tmp2,0xE\n\t" 6038 "vpmullq $tmp2,$tmp2,$tmp\n\t" 6039 "movdq $tmp,$src1\n\t" 6040 "vpmullq $tmp2,$tmp2,$tmp\n\t" 6041 "movdq $dst,$tmp2\t! mul reduction8L" %} 6042 ins_encode %{ 6043 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6044 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6045 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6046 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6047 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6048 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6049 __ movdq($tmp$$XMMRegister, $src1$$Register); 6050 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6051 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 #endif 6056 6057 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 6058 predicate(UseSSE >= 1 && UseAVX == 0); 6059 match(Set dst (MulReductionVF dst src2)); 6060 effect(TEMP dst, TEMP tmp); 6061 format %{ "mulss $dst,$src2\n\t" 6062 "pshufd $tmp,$src2,0x01\n\t" 6063 "mulss $dst,$tmp\t! mul reduction2F" %} 6064 ins_encode %{ 6065 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 6066 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6067 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6068 %} 6069 ins_pipe( pipe_slow ); 6070 %} 6071 6072 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6073 predicate(UseAVX > 0); 6074 match(Set dst (MulReductionVF dst src2)); 6075 effect(TEMP tmp, TEMP dst); 6076 format %{ "vmulss $dst,$dst,$src2\n\t" 6077 "pshufd $tmp,$src2,0x01\n\t" 6078 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 6079 ins_encode %{ 6080 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6081 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6082 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6088 predicate(UseSSE >= 1 && UseAVX == 0); 6089 match(Set dst (MulReductionVF dst src2)); 6090 effect(TEMP dst, TEMP tmp); 6091 format %{ "mulss $dst,$src2\n\t" 6092 "pshufd $tmp,$src2,0x01\n\t" 6093 "mulss $dst,$tmp\n\t" 6094 "pshufd $tmp,$src2,0x02\n\t" 6095 "mulss $dst,$tmp\n\t" 6096 "pshufd $tmp,$src2,0x03\n\t" 6097 "mulss $dst,$tmp\t! mul reduction4F" %} 6098 ins_encode %{ 6099 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 6100 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6101 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6102 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6103 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6104 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6105 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6111 predicate(UseAVX > 0); 6112 match(Set dst (MulReductionVF dst src2)); 6113 effect(TEMP tmp, TEMP dst); 6114 format %{ "vmulss $dst,$dst,$src2\n\t" 6115 "pshufd $tmp,$src2,0x01\n\t" 6116 "vmulss $dst,$dst,$tmp\n\t" 6117 "pshufd $tmp,$src2,0x02\n\t" 6118 "vmulss $dst,$dst,$tmp\n\t" 6119 "pshufd $tmp,$src2,0x03\n\t" 6120 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 6121 ins_encode %{ 6122 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6123 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6124 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6125 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6126 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6127 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6128 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6129 %} 6130 ins_pipe( pipe_slow ); 6131 %} 6132 6133 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 6134 predicate(UseAVX > 0); 6135 match(Set dst (MulReductionVF dst src2)); 6136 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6137 format %{ "vmulss $dst,$dst,$src2\n\t" 6138 "pshufd $tmp,$src2,0x01\n\t" 6139 "vmulss $dst,$dst,$tmp\n\t" 6140 "pshufd $tmp,$src2,0x02\n\t" 6141 "vmulss $dst,$dst,$tmp\n\t" 6142 "pshufd $tmp,$src2,0x03\n\t" 6143 "vmulss $dst,$dst,$tmp\n\t" 6144 "vextractf128_high $tmp2,$src2\n\t" 6145 "vmulss $dst,$dst,$tmp2\n\t" 6146 "pshufd $tmp,$tmp2,0x01\n\t" 6147 "vmulss $dst,$dst,$tmp\n\t" 6148 "pshufd $tmp,$tmp2,0x02\n\t" 6149 "vmulss $dst,$dst,$tmp\n\t" 6150 "pshufd $tmp,$tmp2,0x03\n\t" 6151 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 6152 ins_encode %{ 6153 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6154 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6155 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6156 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6157 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6158 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6159 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6160 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6161 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6162 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6163 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6164 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6165 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6166 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6167 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6168 %} 6169 ins_pipe( pipe_slow ); 6170 %} 6171 6172 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 6173 predicate(UseAVX > 2); 6174 match(Set dst (MulReductionVF dst src2)); 6175 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6176 format %{ "vmulss $dst,$dst,$src2\n\t" 6177 "pshufd $tmp,$src2,0x01\n\t" 6178 "vmulss $dst,$dst,$tmp\n\t" 6179 "pshufd $tmp,$src2,0x02\n\t" 6180 "vmulss $dst,$dst,$tmp\n\t" 6181 "pshufd $tmp,$src2,0x03\n\t" 6182 "vmulss $dst,$dst,$tmp\n\t" 6183 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6184 "vmulss $dst,$dst,$tmp2\n\t" 6185 "pshufd $tmp,$tmp2,0x01\n\t" 6186 "vmulss $dst,$dst,$tmp\n\t" 6187 "pshufd $tmp,$tmp2,0x02\n\t" 6188 "vmulss $dst,$dst,$tmp\n\t" 6189 "pshufd $tmp,$tmp2,0x03\n\t" 6190 "vmulss $dst,$dst,$tmp\n\t" 6191 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6192 "vmulss $dst,$dst,$tmp2\n\t" 6193 "pshufd $tmp,$tmp2,0x01\n\t" 6194 "vmulss $dst,$dst,$tmp\n\t" 6195 "pshufd $tmp,$tmp2,0x02\n\t" 6196 "vmulss $dst,$dst,$tmp\n\t" 6197 "pshufd $tmp,$tmp2,0x03\n\t" 6198 "vmulss $dst,$dst,$tmp\n\t" 6199 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6200 "vmulss $dst,$dst,$tmp2\n\t" 6201 "pshufd $tmp,$tmp2,0x01\n\t" 6202 "vmulss $dst,$dst,$tmp\n\t" 6203 "pshufd $tmp,$tmp2,0x02\n\t" 6204 "vmulss $dst,$dst,$tmp\n\t" 6205 "pshufd $tmp,$tmp2,0x03\n\t" 6206 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 6207 ins_encode %{ 6208 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6209 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6210 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6211 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6212 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6213 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6214 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6215 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6216 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6217 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6218 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6219 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6220 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6221 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6222 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6223 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6224 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6225 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6226 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6227 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6228 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6229 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6230 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6231 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6232 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6233 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6234 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6235 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6236 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6237 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6238 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6239 %} 6240 ins_pipe( pipe_slow ); 6241 %} 6242 6243 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6244 predicate(UseSSE >= 1 && UseAVX == 0); 6245 match(Set dst (MulReductionVD dst src2)); 6246 effect(TEMP dst, TEMP tmp); 6247 format %{ "mulsd $dst,$src2\n\t" 6248 "pshufd $tmp,$src2,0xE\n\t" 6249 "mulsd $dst,$tmp\t! mul reduction2D" %} 6250 ins_encode %{ 6251 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 6252 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6253 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 6254 %} 6255 ins_pipe( pipe_slow ); 6256 %} 6257 6258 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6259 predicate(UseAVX > 0); 6260 match(Set dst (MulReductionVD dst src2)); 6261 effect(TEMP tmp, TEMP dst); 6262 format %{ "vmulsd $dst,$dst,$src2\n\t" 6263 "pshufd $tmp,$src2,0xE\n\t" 6264 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 6265 ins_encode %{ 6266 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6267 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6268 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 6274 predicate(UseAVX > 0); 6275 match(Set dst (MulReductionVD dst src2)); 6276 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6277 format %{ "vmulsd $dst,$dst,$src2\n\t" 6278 "pshufd $tmp,$src2,0xE\n\t" 6279 "vmulsd $dst,$dst,$tmp\n\t" 6280 "vextractf128_high $tmp2,$src2\n\t" 6281 "vmulsd $dst,$dst,$tmp2\n\t" 6282 "pshufd $tmp,$tmp2,0xE\n\t" 6283 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 6284 ins_encode %{ 6285 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6286 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6287 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6288 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6289 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6290 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6291 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 6297 predicate(UseAVX > 2); 6298 match(Set dst (MulReductionVD dst src2)); 6299 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6300 format %{ "vmulsd $dst,$dst,$src2\n\t" 6301 "pshufd $tmp,$src2,0xE\n\t" 6302 "vmulsd $dst,$dst,$tmp\n\t" 6303 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6304 "vmulsd $dst,$dst,$tmp2\n\t" 6305 "pshufd $tmp,$src2,0xE\n\t" 6306 "vmulsd $dst,$dst,$tmp\n\t" 6307 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6308 "vmulsd $dst,$dst,$tmp2\n\t" 6309 "pshufd $tmp,$tmp2,0xE\n\t" 6310 "vmulsd $dst,$dst,$tmp\n\t" 6311 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6312 "vmulsd $dst,$dst,$tmp2\n\t" 6313 "pshufd $tmp,$tmp2,0xE\n\t" 6314 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 6315 ins_encode %{ 6316 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6317 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6318 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6319 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6320 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6321 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6322 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6323 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6324 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6325 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6326 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6327 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6328 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6329 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6330 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6331 %} 6332 ins_pipe( pipe_slow ); 6333 %} 6334 6335 instruct rsand8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 6336 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6337 match(Set dst (AndReductionV src1 src2)); 6338 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6339 format %{ 6340 "pshufd $tmp,$src2,0x1\n\t" 6341 "pand $tmp,$src2\n\t" 6342 "movzbl $dst,$src1\n\t" 6343 "pextrb $tmp2,$tmp, 0x0\n\t" 6344 "andb $dst,$tmp2\n\t" 6345 "pextrb $tmp2,$tmp, 0x1\n\t" 6346 "andb $dst,$tmp2\n\t" 6347 "pextrb $tmp2,$tmp, 0x2\n\t" 6348 "andb $dst,$tmp2\n\t" 6349 "pextrb $tmp2,$tmp, 0x3\n\t" 6350 "andb $dst,$tmp2\n\t" 6351 "movsbl $dst,$dst\t! and reduction8B" %} 6352 ins_encode %{ 6353 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 6354 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6355 __ movzbl($dst$$Register, $src1$$Register); 6356 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6357 __ andb($dst$$Register, $tmp2$$Register); 6358 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6359 __ andb($dst$$Register, $tmp2$$Register); 6360 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 6361 __ andb($dst$$Register, $tmp2$$Register); 6362 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 6363 __ andb($dst$$Register, $tmp2$$Register); 6364 __ movsbl($dst$$Register, $dst$$Register); 6365 %} 6366 ins_pipe( pipe_slow ); 6367 %} 6368 6369 instruct rsand16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6370 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6371 match(Set dst (AndReductionV src1 src2)); 6372 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6373 format %{ "pshufd $tmp,$src2,0xE\n\t" 6374 "pand $tmp,$src2\n\t" 6375 "pshufd $tmp2,$tmp,0x1\n\t" 6376 "pand $tmp,$tmp,$tmp2\n\t" 6377 "movzbl $dst,$src1\n\t" 6378 "pextrb $tmp3,$tmp, 0x0\n\t" 6379 "andb $dst,$tmp3\n\t" 6380 "pextrb $tmp3,$tmp, 0x1\n\t" 6381 "andb $dst,$tmp3\n\t" 6382 "pextrb $tmp3,$tmp, 0x2\n\t" 6383 "andb $dst,$tmp3\n\t" 6384 "pextrb $tmp3,$tmp, 0x3\n\t" 6385 "andb $dst,$tmp3\n\t" 6386 "movsbl $dst,$dst\t! and reduction16B" %} 6387 ins_encode %{ 6388 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6389 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6390 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6391 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 6392 __ movzbl($dst$$Register, $src1$$Register); 6393 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6394 __ andb($dst$$Register, $tmp3$$Register); 6395 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6396 __ andb($dst$$Register, $tmp3$$Register); 6397 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 6398 __ andb($dst$$Register, $tmp3$$Register); 6399 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 6400 __ andb($dst$$Register, $tmp3$$Register); 6401 __ movsbl($dst$$Register, $dst$$Register); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 instruct rvand32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6407 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6408 match(Set dst (AndReductionV src1 src2)); 6409 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6410 format %{ "vextracti128_high $tmp,$src2\n\t" 6411 "vpand $tmp,$tmp,$src2\n\t" 6412 "pshufd $tmp2,$tmp,0xE\n\t" 6413 "vpand $tmp,$tmp,$tmp2\n\t" 6414 "pshufd $tmp2,$tmp,0x1\n\t" 6415 "vpand $tmp,$tmp,$tmp2\n\t" 6416 "movzbl $dst,$src1\n\t" 6417 "pextrb $tmp3,$tmp, 0x0\n\t" 6418 "andb $dst,$tmp3\n\t" 6419 "pextrb $tmp3,$tmp, 0x1\n\t" 6420 "andb $dst,$tmp3\n\t" 6421 "pextrb $tmp3,$tmp, 0x2\n\t" 6422 "andb $dst,$tmp3\n\t" 6423 "pextrb $tmp3,$tmp, 0x3\n\t" 6424 "andb $dst,$tmp3\n\t" 6425 "movsbl $dst,$dst\t! and reduction32B" %} 6426 ins_encode %{ 6427 int vector_len = 0; 6428 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6429 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6430 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6431 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6432 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6433 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6434 __ movzbl($dst$$Register, $src1$$Register); 6435 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6436 __ andb($dst$$Register, $tmp3$$Register); 6437 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6438 __ andb($dst$$Register, $tmp3$$Register); 6439 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 6440 __ andb($dst$$Register, $tmp3$$Register); 6441 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 6442 __ andb($dst$$Register, $tmp3$$Register); 6443 __ movsbl($dst$$Register, $dst$$Register); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct rvand64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6449 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6450 match(Set dst (AndReductionV src1 src2)); 6451 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6452 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6453 "vpand $tmp2,$tmp2,$src2\n\t" 6454 "vextracti128_high $tmp,$tmp2\n\t" 6455 "vpand $tmp,$tmp,$tmp2\n\t" 6456 "pshufd $tmp2,$tmp,0xE\n\t" 6457 "vpand $tmp,$tmp,$tmp2\n\t" 6458 "pshufd $tmp2,$tmp,0x1\n\t" 6459 "vpand $tmp,$tmp,$tmp2\n\t" 6460 "movzbl $dst,$src1\n\t" 6461 "movdl $tmp3,$tmp\n\t" 6462 "andb $dst,$tmp3\n\t" 6463 "shrl $tmp3,0x8\n\t" 6464 "andb $dst,$tmp3\n\t" 6465 "shrl $tmp3,0x8\n\t" 6466 "andb $dst,$tmp3\n\t" 6467 "shrl $tmp3,0x8\n\t" 6468 "andb $dst,$tmp3\n\t" 6469 "movsbl $dst,$dst\t! and reduction64B" %} 6470 ins_encode %{ 6471 int vector_len = 0; 6472 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6473 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6474 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6475 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6476 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6477 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6478 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6479 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6480 __ movzbl($dst$$Register, $src1$$Register); 6481 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 6482 __ andb($dst$$Register, $tmp3$$Register); 6483 __ shrl($tmp3$$Register, 8); 6484 __ andb($dst$$Register, $tmp3$$Register); 6485 __ shrl($tmp3$$Register, 8); 6486 __ andb($dst$$Register, $tmp3$$Register); 6487 __ shrl($tmp3$$Register, 8); 6488 __ andb($dst$$Register, $tmp3$$Register); 6489 __ movsbl($dst$$Register, $dst$$Register); 6490 %} 6491 ins_pipe( pipe_slow ); 6492 %} 6493 6494 instruct rsand4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 6495 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6496 match(Set dst (AndReductionV src1 src2)); 6497 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6498 format %{ 6499 "pshufd $tmp,$src2,0x1\n\t" 6500 "pand $tmp,$src2\n\t" 6501 "movzwl $dst,$src1\n\t" 6502 "pextrw $tmp2,$tmp, 0x0\n\t" 6503 "andw $dst,$tmp2\n\t" 6504 "pextrw $tmp2,$tmp, 0x1\n\t" 6505 "andw $dst,$tmp2\n\t" 6506 "movswl $dst,$dst\t! and reduction4S" %} 6507 ins_encode %{ 6508 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 6509 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6510 __ movzwl($dst$$Register, $src1$$Register); 6511 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6512 __ andw($dst$$Register, $tmp2$$Register); 6513 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6514 __ andw($dst$$Register, $tmp2$$Register); 6515 __ movswl($dst$$Register, $dst$$Register); 6516 %} 6517 ins_pipe( pipe_slow ); 6518 %} 6519 6520 instruct rsand8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6521 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6522 match(Set dst (AndReductionV src1 src2)); 6523 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6524 format %{ "pshufd $tmp,$src2,0xE\n\t" 6525 "pand $tmp,$src2\n\t" 6526 "pshufd $tmp2,$tmp,0x1\n\t" 6527 "pand $tmp,$tmp,$tmp2\n\t" 6528 "movzwl $dst,$src1\n\t" 6529 "pextrw $tmp3,$tmp, 0x0\n\t" 6530 "andw $dst,$tmp3\n\t" 6531 "pextrw $tmp3,$tmp, 0x1\n\t" 6532 "andw $dst,$tmp3\n\t" 6533 "movswl $dst,$dst\t! and reduction8S" %} 6534 ins_encode %{ 6535 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6536 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6537 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6538 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 6539 __ movzwl($dst$$Register, $src1$$Register); 6540 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6541 __ andw($dst$$Register, $tmp3$$Register); 6542 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6543 __ andw($dst$$Register, $tmp3$$Register); 6544 __ movswl($dst$$Register, $dst$$Register); 6545 %} 6546 ins_pipe( pipe_slow ); 6547 %} 6548 6549 instruct rvand16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6550 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6551 match(Set dst (AndReductionV src1 src2)); 6552 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6553 format %{ "vextracti128_high $tmp,$src2\n\t" 6554 "vpand $tmp,$tmp,$src2\n\t" 6555 "pshufd $tmp2,$tmp,0xE\n\t" 6556 "vpand $tmp,$tmp,$tmp2\n\t" 6557 "pshufd $tmp2,$tmp,0x1\n\t" 6558 "vpand $tmp,$tmp,$tmp2\n\t" 6559 "movzwl $dst,$src1\n\t" 6560 "pextrw $tmp3,$tmp, 0x0\n\t" 6561 "andw $dst,$tmp3\n\t" 6562 "pextrw $tmp3,$tmp, 0x1\n\t" 6563 "andw $dst,$tmp3\n\t" 6564 "movswl $dst,$dst\t! and reduction16S" %} 6565 ins_encode %{ 6566 int vector_len = 0; 6567 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6568 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6569 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6570 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6571 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6572 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6573 __ movzwl($dst$$Register, $src1$$Register); 6574 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6575 __ andw($dst$$Register, $tmp3$$Register); 6576 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6577 __ andw($dst$$Register, $tmp3$$Register); 6578 __ movswl($dst$$Register, $dst$$Register); 6579 %} 6580 ins_pipe( pipe_slow ); 6581 %} 6582 6583 instruct rvand32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6584 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6585 match(Set dst (AndReductionV src1 src2)); 6586 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6587 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6588 "vpand $tmp2,$tmp2,$src2\n\t" 6589 "vextracti128_high $tmp,$tmp2\n\t" 6590 "vpand $tmp,$tmp,$tmp2\n\t" 6591 "pshufd $tmp2,$tmp,0xE\n\t" 6592 "vpand $tmp,$tmp,$tmp2\n\t" 6593 "pshufd $tmp2,$tmp,0x1\n\t" 6594 "vpand $tmp,$tmp,$tmp2\n\t" 6595 "movzwl $dst,$src1\n\t" 6596 "movdl $tmp3,$tmp\n\t" 6597 "andw $dst,$tmp3\n\t" 6598 "shrl $tmp3,0x16\n\t" 6599 "andw $dst,$tmp3\n\t" 6600 "movswl $dst,$dst\t! and reduction32S" %} 6601 ins_encode %{ 6602 int vector_len = 0; 6603 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6604 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6605 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6606 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6607 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6608 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6609 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6610 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6611 __ movzwl($dst$$Register, $src1$$Register); 6612 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 6613 __ andw($dst$$Register, $tmp3$$Register); 6614 __ shrl($tmp3$$Register, 16); 6615 __ andw($dst$$Register, $tmp3$$Register); 6616 __ movswl($dst$$Register, $dst$$Register); 6617 %} 6618 ins_pipe( pipe_slow ); 6619 %} 6620 6621 instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6622 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6623 match(Set dst (AndReductionV src1 src2)); 6624 effect(TEMP tmp, TEMP tmp2); 6625 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6626 "pand $tmp2,$src2\n\t" 6627 "movd $tmp,$src1\n\t" 6628 "pand $tmp2,$tmp\n\t" 6629 "movd $dst,$tmp2\t! and reduction2I" %} 6630 ins_encode %{ 6631 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6632 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 6633 __ movdl($tmp$$XMMRegister, $src1$$Register); 6634 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6635 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6641 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6642 match(Set dst (AndReductionV src1 src2)); 6643 effect(TEMP tmp, TEMP tmp2); 6644 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6645 "pand $tmp2,$src2\n\t" 6646 "pshufd $tmp,$tmp2,0x1\n\t" 6647 "pand $tmp2,$tmp\n\t" 6648 "movd $tmp,$src1\n\t" 6649 "pand $tmp2,$tmp\n\t" 6650 "movd $dst,$tmp2\t! and reduction4I" %} 6651 ins_encode %{ 6652 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6653 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 6654 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 6655 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6656 __ movdl($tmp$$XMMRegister, $src1$$Register); 6657 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6658 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6659 %} 6660 ins_pipe( pipe_slow ); 6661 %} 6662 6663 instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6664 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6665 match(Set dst (AndReductionV src1 src2)); 6666 effect(TEMP tmp, TEMP tmp2); 6667 format %{ "vextracti128_high $tmp,$src2\n\t" 6668 "vpand $tmp,$tmp,$src2\n\t" 6669 "vpshufd $tmp2,$tmp,0xE\n\t" 6670 "vpand $tmp,$tmp,$tmp2\n\t" 6671 "vpshufd $tmp2,$tmp,0x1\n\t" 6672 "vpand $tmp,$tmp,$tmp2\n\t" 6673 "movd $tmp2,$src1\n\t" 6674 "vpand $tmp2,$tmp,$tmp2\n\t" 6675 "movd $dst,$tmp2\t! and reduction8I" %} 6676 ins_encode %{ 6677 int vector_len = 0; 6678 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6679 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6680 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6681 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6682 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6683 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6684 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6685 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6686 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6692 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6693 match(Set dst (AndReductionV src1 src2)); 6694 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6695 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6696 "vpand $tmp3,$tmp3,$src2\n\t" 6697 "vextracti128_high $tmp,$tmp3\n\t" 6698 "vpand $tmp,$tmp,$src2\n\t" 6699 "vpshufd $tmp2,$tmp,0xE\n\t" 6700 "vpand $tmp,$tmp,$tmp2\n\t" 6701 "vpshufd $tmp2,$tmp,0x1\n\t" 6702 "vpand $tmp,$tmp,$tmp2\n\t" 6703 "movd $tmp2,$src1\n\t" 6704 "vpand $tmp2,$tmp,$tmp2\n\t" 6705 "movd $dst,$tmp2\t! and reduction16I" %} 6706 ins_encode %{ 6707 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6708 __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6709 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6710 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 6711 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); 6712 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6713 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); 6714 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6715 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6716 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6717 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 #ifdef _LP64 6723 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6724 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 6725 match(Set dst (AndReductionV src1 src2)); 6726 effect(TEMP tmp, TEMP tmp2); 6727 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6728 "pand $tmp2,$src2\n\t" 6729 "movdq $tmp,$src1\n\t" 6730 "pand $tmp2,$tmp\n\t" 6731 "movq $dst,$tmp2\t! and reduction2L" %} 6732 ins_encode %{ 6733 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6734 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 6735 __ movdq($tmp$$XMMRegister, $src1$$Register); 6736 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6737 __ movq($dst$$Register, $tmp2$$XMMRegister); 6738 %} 6739 ins_pipe( pipe_slow ); 6740 %} 6741 6742 instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6743 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 6744 match(Set dst (AndReductionV src1 src2)); 6745 effect(TEMP tmp, TEMP tmp2); 6746 format %{ "vextracti128_high $tmp,$src2\n\t" 6747 "vpand $tmp2,$tmp,$src2\n\t" 6748 "vpshufd $tmp,$tmp2,0xE\n\t" 6749 "vpand $tmp2,$tmp2,$tmp\n\t" 6750 "movq $tmp,$src1\n\t" 6751 "vpand $tmp2,$tmp2,$tmp\n\t" 6752 "movq $dst,$tmp2\t! and reduction4L" %} 6753 ins_encode %{ 6754 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6755 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 6756 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 6757 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6758 __ movq($tmp$$XMMRegister, $src1$$Register); 6759 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6760 __ movq($dst$$Register, $tmp2$$XMMRegister); 6761 %} 6762 ins_pipe( pipe_slow ); 6763 %} 6764 6765 instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6766 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 6767 match(Set dst (AndReductionV src1 src2)); 6768 effect(TEMP tmp, TEMP tmp2); 6769 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6770 "vpandq $tmp2,$tmp2,$src2\n\t" 6771 "vextracti128_high $tmp,$tmp2\n\t" 6772 "vpandq $tmp2,$tmp2,$tmp\n\t" 6773 "vpshufd $tmp,$tmp2,0xE\n\t" 6774 "vpandq $tmp2,$tmp2,$tmp\n\t" 6775 "movdq $tmp,$src1\n\t" 6776 "vpandq $tmp2,$tmp2,$tmp\n\t" 6777 "movdq $dst,$tmp2\t! and reduction8L" %} 6778 ins_encode %{ 6779 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6780 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6781 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6782 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6783 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 6784 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6785 __ movdq($tmp$$XMMRegister, $src1$$Register); 6786 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6787 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6788 %} 6789 ins_pipe( pipe_slow ); 6790 %} 6791 #endif 6792 6793 instruct rsor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6794 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6795 match(Set dst (OrReductionV src1 src2)); 6796 effect(TEMP tmp, TEMP tmp2); 6797 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6798 "por $tmp2,$src2\n\t" 6799 "movd $tmp,$src1\n\t" 6800 "por $tmp2,$tmp\n\t" 6801 "movd $dst,$tmp2\t! or reduction2I" %} 6802 ins_encode %{ 6803 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6804 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 6805 __ movdl($tmp$$XMMRegister, $src1$$Register); 6806 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6807 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6808 %} 6809 ins_pipe( pipe_slow ); 6810 %} 6811 6812 instruct rsor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6813 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6814 match(Set dst (OrReductionV src1 src2)); 6815 effect(TEMP tmp, TEMP tmp2); 6816 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6817 "por $tmp2,$src2\n\t" 6818 "pshufd $tmp,$tmp2,0x1\n\t" 6819 "por $tmp2,$tmp\n\t" 6820 "movd $tmp,$src1\n\t" 6821 "por $tmp2,$tmp\n\t" 6822 "movd $dst,$tmp2\t! or reduction4I" %} 6823 ins_encode %{ 6824 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6825 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 6826 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 6827 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6828 __ movdl($tmp$$XMMRegister, $src1$$Register); 6829 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6830 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6831 %} 6832 ins_pipe( pipe_slow ); 6833 %} 6834 6835 instruct rvor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6836 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_INT); 6837 match(Set dst (OrReductionV src1 src2)); 6838 effect(TEMP tmp, TEMP tmp2); 6839 format %{ "vextracti128_high $tmp,$src2\n\t" 6840 "vpor $tmp,$tmp,$src2\n\t" 6841 "vpshufd $tmp2,$tmp,0xE\t" 6842 "vpor $tmp,$tmp,$tmp2\n\t" 6843 "vpshufd $tmp2,$tmp,0x1\t" 6844 "vpor $tmp,$tmp,$tmp2\n\t" 6845 "movd $tmp2,$src1\n\t" 6846 "vpor $tmp2,$tmp,$tmp2\n\t" 6847 "movd $dst,$tmp2\t! or reduction8I" %} 6848 ins_encode %{ 6849 int vector_len = 0; 6850 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6851 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6852 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6853 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6854 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6855 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6856 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6857 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6858 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6859 %} 6860 ins_pipe( pipe_slow ); 6861 %} 6862 6863 instruct rvor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6864 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_INT); 6865 match(Set dst (OrReductionV src1 src2)); 6866 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6867 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6868 "vpor $tmp3,$tmp3,$src2\n\t" 6869 "vextracti128_high $tmp,$tmp3\n\t" 6870 "vpor $tmp,$tmp,$src2\n\t" 6871 "vpshufd $tmp2,$tmp,0xE\t" 6872 "vpor $tmp,$tmp,$tmp2\n\t" 6873 "vpshufd $tmp2,$tmp,0x1\t" 6874 "vpor $tmp,$tmp,$tmp2\n\t" 6875 "movd $tmp2,$src1\n\t" 6876 "vpor $tmp2,$tmp,$tmp2\n\t" 6877 "movd $dst,$tmp2\t! or reduction16I" %} 6878 ins_encode %{ 6879 int vector_len = 0; 6880 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6881 __ vpor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6882 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6883 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 6884 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6885 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6886 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6887 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6888 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6889 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6890 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6891 %} 6892 ins_pipe( pipe_slow ); 6893 %} 6894 6895 instruct rsor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6896 predicate(UseSSE >= 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6897 match(Set dst (OrReductionV src1 src2)); 6898 effect(TEMP tmp, TEMP tmp2); 6899 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6900 "por $tmp2,$src2\n\t" 6901 "movdq $tmp,$src1\n\t" 6902 "por $tmp2,$tmp\n\t" 6903 "movq $dst,$tmp2\t! or reduction2L" %} 6904 ins_encode %{ 6905 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6906 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 6907 __ movdq($tmp$$XMMRegister, $src1$$Register); 6908 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6909 __ movq($dst$$Register, $tmp2$$XMMRegister); 6910 %} 6911 ins_pipe( pipe_slow ); 6912 %} 6913 6914 instruct rvor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6915 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6916 match(Set dst (OrReductionV src1 src2)); 6917 effect(TEMP tmp, TEMP tmp2); 6918 format %{ "vextracti128_high $tmp,$src2\n\t" 6919 "vpor $tmp2,$tmp,$src2\n\t" 6920 "vpshufd $tmp,$tmp2,0xE\t" 6921 "vpor $tmp2,$tmp2,$tmp\n\t" 6922 "movq $tmp,$src1\n\t" 6923 "vpor $tmp2,$tmp2,$tmp\n\t" 6924 "movq $dst,$tmp2\t! or reduction4L" %} 6925 ins_encode %{ 6926 int vector_len = 0; 6927 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6928 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6929 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 6930 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6931 __ movq($tmp$$XMMRegister, $src1$$Register); 6932 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6933 __ movq($dst$$Register, $tmp2$$XMMRegister); 6934 %} 6935 ins_pipe( pipe_slow ); 6936 %} 6937 6938 #ifdef _LP64 6939 instruct rvor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6940 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6941 match(Set dst (OrReductionV src1 src2)); 6942 effect(TEMP tmp, TEMP tmp2); 6943 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6944 "vporq $tmp2,$tmp2,$src2\n\t" 6945 "vextracti128_high $tmp,$tmp2\n\t" 6946 "vporq $tmp2,$tmp2,$tmp\n\t" 6947 "vpshufd $tmp,$tmp2,0xE\t" 6948 "vporq $tmp2,$tmp2,$tmp\n\t" 6949 "movdq $tmp,$src1\n\t" 6950 "vporq $tmp2,$tmp2,$tmp\n\t" 6951 "movdq $dst,$tmp2\t! or reduction8L" %} 6952 ins_encode %{ 6953 int vector_len = 0; 6954 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6955 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6956 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6957 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6958 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 6959 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6960 __ movdq($tmp$$XMMRegister, $src1$$Register); 6961 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6962 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6963 %} 6964 ins_pipe( pipe_slow ); 6965 %} 6966 #endif 6967 6968 instruct rsxor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6969 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6970 match(Set dst (XorReductionV src1 src2)); 6971 effect(TEMP tmp, TEMP tmp2); 6972 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6973 "pxor $tmp2,$src2\n\t" 6974 "movd $tmp,$src1\n\t" 6975 "pxor $tmp2,$tmp\n\t" 6976 "movd $dst,$tmp2\t! xor reduction2I" %} 6977 ins_encode %{ 6978 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6979 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 6980 __ movdl($tmp$$XMMRegister, $src1$$Register); 6981 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 6982 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6983 %} 6984 ins_pipe( pipe_slow ); 6985 %} 6986 6987 instruct rsxor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6988 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6989 match(Set dst (XorReductionV src1 src2)); 6990 effect(TEMP tmp, TEMP tmp2); 6991 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6992 "pxor $tmp2,$src2\n\t" 6993 "pshufd $tmp,$tmp2,0x1\n\t" 6994 "pxor $tmp2,$tmp\n\t" 6995 "movd $tmp,$src1\n\t" 6996 "pxor $tmp2,$tmp\n\t" 6997 "movd $dst,$tmp2\t! xor reduction4I" %} 6998 ins_encode %{ 6999 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7000 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 7001 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 7002 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 7003 __ movdl($tmp$$XMMRegister, $src1$$Register); 7004 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 7005 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct rvxor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 7011 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_INT); 7012 match(Set dst (XorReductionV src1 src2)); 7013 effect(TEMP tmp, TEMP tmp2); 7014 format %{ "vextracti128_high $tmp,$src2\n\t" 7015 "vpxor $tmp,$tmp,$src2\n\t" 7016 "vpshufd $tmp2,$tmp,0xE\t" 7017 "vpxor $tmp,$tmp,$tmp2\n\t" 7018 "vpshufd $tmp2,$tmp,0x1\t" 7019 "vpxor $tmp,$tmp,$tmp2\n\t" 7020 "movd $tmp2,$src1\n\t" 7021 "vpxor $tmp2,$tmp,$tmp2\n\t" 7022 "movd $dst,$tmp2\t! xor reduction8I" %} 7023 ins_encode %{ 7024 int vector_len = 0; 7025 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7026 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7027 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 7028 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7029 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 7030 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7031 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7032 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7033 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7034 %} 7035 ins_pipe( pipe_slow ); 7036 %} 7037 7038 instruct rvxor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 7039 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_INT); 7040 match(Set dst (XorReductionV src1 src2)); 7041 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 7042 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 7043 "vpxor $tmp3,$tmp3,$src2\n\t" 7044 "vextracti128_high $tmp,$tmp3\n\t" 7045 "vpxor $tmp,$tmp,$src2\n\t" 7046 "vpshufd $tmp2,$tmp,0xE\t" 7047 "vpxor $tmp,$tmp,$tmp2\n\t" 7048 "vpshufd $tmp2,$tmp,0x1\t" 7049 "vpxor $tmp,$tmp,$tmp2\n\t" 7050 "movd $tmp2,$src1\n\t" 7051 "vpxor $tmp2,$tmp,$tmp2\n\t" 7052 "movd $dst,$tmp2\t! xor reduction16I" %} 7053 ins_encode %{ 7054 int vector_len = 0; 7055 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 7056 __ vpxor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 7057 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 7058 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 7059 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 7060 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7061 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 7062 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7063 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7064 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7065 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7066 %} 7067 ins_pipe( pipe_slow ); 7068 %} 7069 7070 instruct rsxor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 7071 predicate(UseSSE >= 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 7072 match(Set dst (XorReductionV src1 src2)); 7073 effect(TEMP tmp, TEMP tmp2); 7074 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7075 "pxor $tmp2,$src2\n\t" 7076 "movdq $tmp,$src1\n\t" 7077 "pxor $tmp2,$tmp\n\t" 7078 "movq $dst,$tmp2\t! xor reduction2L" %} 7079 ins_encode %{ 7080 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7081 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 7082 __ movdq($tmp$$XMMRegister, $src1$$Register); 7083 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 7084 __ movq($dst$$Register, $tmp2$$XMMRegister); 7085 %} 7086 ins_pipe( pipe_slow ); 7087 %} 7088 7089 instruct rvxor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 7090 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_LONG); 7091 match(Set dst (XorReductionV src1 src2)); 7092 effect(TEMP tmp, TEMP tmp2); 7093 format %{ "vextracti128_high $tmp,$src2\n\t" 7094 "vpxor $tmp2,$tmp,$src2\n\t" 7095 "vpshufd $tmp,$tmp2,0xE\t" 7096 "vpxor $tmp2,$tmp2,$tmp\n\t" 7097 "movq $tmp,$src1\n\t" 7098 "vpxor $tmp2,$tmp2,$tmp\n\t" 7099 "movq $dst,$tmp2\t! xor reduction4L" %} 7100 ins_encode %{ 7101 int vector_len = 0; 7102 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7103 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7104 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 7105 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7106 __ movq($tmp$$XMMRegister, $src1$$Register); 7107 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7108 __ movq($dst$$Register, $tmp2$$XMMRegister); 7109 %} 7110 ins_pipe( pipe_slow ); 7111 %} 7112 7113 #ifdef _LP64 7114 instruct rvxor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 7115 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 7116 match(Set dst (XorReductionV src1 src2)); 7117 effect(TEMP tmp, TEMP tmp2); 7118 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 7119 "vpxorq $tmp2,$tmp2,$src2\n\t" 7120 "vextracti128_high $tmp,$tmp2\n\t" 7121 "vpxorq $tmp2,$tmp2,$tmp\n\t" 7122 "vpshufd $tmp,$tmp2,0xE\t" 7123 "vpxorq $tmp2,$tmp2,$tmp\n\t" 7124 "movdq $tmp,$src1\n\t" 7125 "vpxorq $tmp2,$tmp2,$tmp\n\t" 7126 "movdq $dst,$tmp2\t! xor reduction8L" %} 7127 ins_encode %{ 7128 int vector_len = 0; 7129 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7130 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 7131 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 7132 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7133 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 7134 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7135 __ movdq($tmp$$XMMRegister, $src1$$Register); 7136 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7137 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7138 %} 7139 ins_pipe( pipe_slow ); 7140 %} 7141 #endif 7142 7143 // ====================VECTOR ARITHMETIC======================================= 7144 7145 // --------------------------------- ADD -------------------------------------- 7146 7147 // Bytes vector add 7148 instruct vadd4B(vecS dst, vecS src) %{ 7149 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7150 match(Set dst (AddVB dst src)); 7151 format %{ "paddb $dst,$src\t! add packed4B" %} 7152 ins_encode %{ 7153 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 7154 %} 7155 ins_pipe( pipe_slow ); 7156 %} 7157 7158 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7159 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7160 match(Set dst (AddVB src1 src2)); 7161 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 7162 ins_encode %{ 7163 int vector_len = 0; 7164 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7165 %} 7166 ins_pipe( pipe_slow ); 7167 %} 7168 7169 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7170 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7171 match(Set dst (AddVB src1 src2)); 7172 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 7173 ins_encode %{ 7174 int vector_len = 0; 7175 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7176 %} 7177 ins_pipe( pipe_slow ); 7178 %} 7179 7180 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7181 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7182 match(Set dst (AddVB dst src2)); 7183 effect(TEMP src1); 7184 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 7185 ins_encode %{ 7186 int vector_len = 0; 7187 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 7193 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7194 match(Set dst (AddVB src (LoadVector mem))); 7195 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 7196 ins_encode %{ 7197 int vector_len = 0; 7198 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7199 %} 7200 ins_pipe( pipe_slow ); 7201 %} 7202 7203 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 7204 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7205 match(Set dst (AddVB src (LoadVector mem))); 7206 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 7207 ins_encode %{ 7208 int vector_len = 0; 7209 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7210 %} 7211 ins_pipe( pipe_slow ); 7212 %} 7213 7214 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7215 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7216 match(Set dst (AddVB dst (LoadVector mem))); 7217 effect(TEMP src); 7218 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 7219 ins_encode %{ 7220 int vector_len = 0; 7221 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7222 %} 7223 ins_pipe( pipe_slow ); 7224 %} 7225 7226 instruct vadd8B(vecD dst, vecD src) %{ 7227 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7228 match(Set dst (AddVB dst src)); 7229 format %{ "paddb $dst,$src\t! add packed8B" %} 7230 ins_encode %{ 7231 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 7232 %} 7233 ins_pipe( pipe_slow ); 7234 %} 7235 7236 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7237 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7238 match(Set dst (AddVB src1 src2)); 7239 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 7240 ins_encode %{ 7241 int vector_len = 0; 7242 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7243 %} 7244 ins_pipe( pipe_slow ); 7245 %} 7246 7247 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7248 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7249 match(Set dst (AddVB src1 src2)); 7250 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 7251 ins_encode %{ 7252 int vector_len = 0; 7253 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7254 %} 7255 ins_pipe( pipe_slow ); 7256 %} 7257 7258 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7259 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7260 match(Set dst (AddVB dst src2)); 7261 effect(TEMP src1); 7262 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 7263 ins_encode %{ 7264 int vector_len = 0; 7265 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7266 %} 7267 ins_pipe( pipe_slow ); 7268 %} 7269 7270 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 7271 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7272 match(Set dst (AddVB src (LoadVector mem))); 7273 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 7274 ins_encode %{ 7275 int vector_len = 0; 7276 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7277 %} 7278 ins_pipe( pipe_slow ); 7279 %} 7280 7281 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 7282 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7283 match(Set dst (AddVB src (LoadVector mem))); 7284 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 7285 ins_encode %{ 7286 int vector_len = 0; 7287 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7288 %} 7289 ins_pipe( pipe_slow ); 7290 %} 7291 7292 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7293 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7294 match(Set dst (AddVB dst (LoadVector mem))); 7295 effect(TEMP src); 7296 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 7297 ins_encode %{ 7298 int vector_len = 0; 7299 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7300 %} 7301 ins_pipe( pipe_slow ); 7302 %} 7303 7304 instruct vadd16B(vecX dst, vecX src) %{ 7305 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 7306 match(Set dst (AddVB dst src)); 7307 format %{ "paddb $dst,$src\t! add packed16B" %} 7308 ins_encode %{ 7309 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7315 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7316 match(Set dst (AddVB src1 src2)); 7317 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 7318 ins_encode %{ 7319 int vector_len = 0; 7320 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7326 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7327 match(Set dst (AddVB src1 src2)); 7328 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 7329 ins_encode %{ 7330 int vector_len = 0; 7331 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7332 %} 7333 ins_pipe( pipe_slow ); 7334 %} 7335 7336 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7337 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7338 match(Set dst (AddVB dst src2)); 7339 effect(TEMP src1); 7340 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 7341 ins_encode %{ 7342 int vector_len = 0; 7343 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7344 %} 7345 ins_pipe( pipe_slow ); 7346 %} 7347 7348 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7349 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7350 match(Set dst (AddVB src (LoadVector mem))); 7351 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 7352 ins_encode %{ 7353 int vector_len = 0; 7354 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7355 %} 7356 ins_pipe( pipe_slow ); 7357 %} 7358 7359 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7360 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7361 match(Set dst (AddVB src (LoadVector mem))); 7362 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 7363 ins_encode %{ 7364 int vector_len = 0; 7365 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7371 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7372 match(Set dst (AddVB dst (LoadVector mem))); 7373 effect(TEMP src); 7374 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 7375 ins_encode %{ 7376 int vector_len = 0; 7377 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7378 %} 7379 ins_pipe( pipe_slow ); 7380 %} 7381 7382 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7383 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7384 match(Set dst (AddVB src1 src2)); 7385 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 7386 ins_encode %{ 7387 int vector_len = 1; 7388 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7389 %} 7390 ins_pipe( pipe_slow ); 7391 %} 7392 7393 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7394 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7395 match(Set dst (AddVB src1 src2)); 7396 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 7397 ins_encode %{ 7398 int vector_len = 1; 7399 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7400 %} 7401 ins_pipe( pipe_slow ); 7402 %} 7403 7404 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7405 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7406 match(Set dst (AddVB dst src2)); 7407 effect(TEMP src1); 7408 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 7409 ins_encode %{ 7410 int vector_len = 1; 7411 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7412 %} 7413 ins_pipe( pipe_slow ); 7414 %} 7415 7416 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7417 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7418 match(Set dst (AddVB src (LoadVector mem))); 7419 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 7420 ins_encode %{ 7421 int vector_len = 1; 7422 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7428 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7429 match(Set dst (AddVB src (LoadVector mem))); 7430 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 7431 ins_encode %{ 7432 int vector_len = 1; 7433 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7434 %} 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7439 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7440 match(Set dst (AddVB dst (LoadVector mem))); 7441 effect(TEMP src); 7442 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 7443 ins_encode %{ 7444 int vector_len = 1; 7445 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7451 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7452 match(Set dst (AddVB src1 src2)); 7453 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 7454 ins_encode %{ 7455 int vector_len = 2; 7456 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 7462 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7463 match(Set dst (AddVB src (LoadVector mem))); 7464 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 7465 ins_encode %{ 7466 int vector_len = 2; 7467 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 // Shorts/Chars vector add 7473 instruct vadd2S(vecS dst, vecS src) %{ 7474 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7475 match(Set dst (AddVS dst src)); 7476 format %{ "paddw $dst,$src\t! add packed2S" %} 7477 ins_encode %{ 7478 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 7479 %} 7480 ins_pipe( pipe_slow ); 7481 %} 7482 7483 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7484 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7485 match(Set dst (AddVS src1 src2)); 7486 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 7487 ins_encode %{ 7488 int vector_len = 0; 7489 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7495 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7496 match(Set dst (AddVS src1 src2)); 7497 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 7498 ins_encode %{ 7499 int vector_len = 0; 7500 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7501 %} 7502 ins_pipe( pipe_slow ); 7503 %} 7504 7505 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7506 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7507 match(Set dst (AddVS dst src2)); 7508 effect(TEMP src1); 7509 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 7510 ins_encode %{ 7511 int vector_len = 0; 7512 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7518 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7519 match(Set dst (AddVS src (LoadVector mem))); 7520 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 7521 ins_encode %{ 7522 int vector_len = 0; 7523 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7524 %} 7525 ins_pipe( pipe_slow ); 7526 %} 7527 7528 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7529 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7530 match(Set dst (AddVS src (LoadVector mem))); 7531 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 7532 ins_encode %{ 7533 int vector_len = 0; 7534 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7535 %} 7536 ins_pipe( pipe_slow ); 7537 %} 7538 7539 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7540 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7541 match(Set dst (AddVS dst (LoadVector mem))); 7542 effect(TEMP src); 7543 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 7544 ins_encode %{ 7545 int vector_len = 0; 7546 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7547 %} 7548 ins_pipe( pipe_slow ); 7549 %} 7550 7551 instruct vadd4S(vecD dst, vecD src) %{ 7552 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7553 match(Set dst (AddVS dst src)); 7554 format %{ "paddw $dst,$src\t! add packed4S" %} 7555 ins_encode %{ 7556 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 7557 %} 7558 ins_pipe( pipe_slow ); 7559 %} 7560 7561 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7562 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7563 match(Set dst (AddVS src1 src2)); 7564 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 7565 ins_encode %{ 7566 int vector_len = 0; 7567 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7568 %} 7569 ins_pipe( pipe_slow ); 7570 %} 7571 7572 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7573 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7574 match(Set dst (AddVS src1 src2)); 7575 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 7576 ins_encode %{ 7577 int vector_len = 0; 7578 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7579 %} 7580 ins_pipe( pipe_slow ); 7581 %} 7582 7583 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7584 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7585 match(Set dst (AddVS dst src2)); 7586 effect(TEMP src1); 7587 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 7588 ins_encode %{ 7589 int vector_len = 0; 7590 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7591 %} 7592 ins_pipe( pipe_slow ); 7593 %} 7594 7595 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7596 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7597 match(Set dst (AddVS src (LoadVector mem))); 7598 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 7599 ins_encode %{ 7600 int vector_len = 0; 7601 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7602 %} 7603 ins_pipe( pipe_slow ); 7604 %} 7605 7606 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7607 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7608 match(Set dst (AddVS src (LoadVector mem))); 7609 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 7610 ins_encode %{ 7611 int vector_len = 0; 7612 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7613 %} 7614 ins_pipe( pipe_slow ); 7615 %} 7616 7617 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7618 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7619 match(Set dst (AddVS dst (LoadVector mem))); 7620 effect(TEMP src); 7621 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 7622 ins_encode %{ 7623 int vector_len = 0; 7624 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7625 %} 7626 ins_pipe( pipe_slow ); 7627 %} 7628 7629 instruct vadd8S(vecX dst, vecX src) %{ 7630 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7631 match(Set dst (AddVS dst src)); 7632 format %{ "paddw $dst,$src\t! add packed8S" %} 7633 ins_encode %{ 7634 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 7635 %} 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7640 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7641 match(Set dst (AddVS src1 src2)); 7642 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 7643 ins_encode %{ 7644 int vector_len = 0; 7645 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7646 %} 7647 ins_pipe( pipe_slow ); 7648 %} 7649 7650 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7651 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7652 match(Set dst (AddVS src1 src2)); 7653 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 7654 ins_encode %{ 7655 int vector_len = 0; 7656 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7657 %} 7658 ins_pipe( pipe_slow ); 7659 %} 7660 7661 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7662 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7663 match(Set dst (AddVS dst src2)); 7664 effect(TEMP src1); 7665 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 7666 ins_encode %{ 7667 int vector_len = 0; 7668 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7669 %} 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7674 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7675 match(Set dst (AddVS src (LoadVector mem))); 7676 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 7677 ins_encode %{ 7678 int vector_len = 0; 7679 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7680 %} 7681 ins_pipe( pipe_slow ); 7682 %} 7683 7684 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7685 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7686 match(Set dst (AddVS src (LoadVector mem))); 7687 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 7688 ins_encode %{ 7689 int vector_len = 0; 7690 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7691 %} 7692 ins_pipe( pipe_slow ); 7693 %} 7694 7695 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7696 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7697 match(Set dst (AddVS dst (LoadVector mem))); 7698 effect(TEMP src); 7699 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 7700 ins_encode %{ 7701 int vector_len = 0; 7702 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7708 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7709 match(Set dst (AddVS src1 src2)); 7710 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 7711 ins_encode %{ 7712 int vector_len = 1; 7713 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7719 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7720 match(Set dst (AddVS src1 src2)); 7721 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 7722 ins_encode %{ 7723 int vector_len = 1; 7724 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7730 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7731 match(Set dst (AddVS dst src2)); 7732 effect(TEMP src1); 7733 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 7734 ins_encode %{ 7735 int vector_len = 1; 7736 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7742 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7743 match(Set dst (AddVS src (LoadVector mem))); 7744 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 7745 ins_encode %{ 7746 int vector_len = 1; 7747 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7753 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7754 match(Set dst (AddVS src (LoadVector mem))); 7755 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 7756 ins_encode %{ 7757 int vector_len = 1; 7758 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7759 %} 7760 ins_pipe( pipe_slow ); 7761 %} 7762 7763 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7764 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7765 match(Set dst (AddVS dst (LoadVector mem))); 7766 effect(TEMP src); 7767 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 7768 ins_encode %{ 7769 int vector_len = 1; 7770 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7776 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7777 match(Set dst (AddVS src1 src2)); 7778 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 7779 ins_encode %{ 7780 int vector_len = 2; 7781 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 7787 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7788 match(Set dst (AddVS src (LoadVector mem))); 7789 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 7790 ins_encode %{ 7791 int vector_len = 2; 7792 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7793 %} 7794 ins_pipe( pipe_slow ); 7795 %} 7796 7797 // Integers vector add 7798 instruct vadd2I(vecD dst, vecD src) %{ 7799 predicate(n->as_Vector()->length() == 2); 7800 match(Set dst (AddVI dst src)); 7801 format %{ "paddd $dst,$src\t! add packed2I" %} 7802 ins_encode %{ 7803 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 7804 %} 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 7809 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7810 match(Set dst (AddVI src1 src2)); 7811 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 7812 ins_encode %{ 7813 int vector_len = 0; 7814 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7815 %} 7816 ins_pipe( pipe_slow ); 7817 %} 7818 7819 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 7820 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7821 match(Set dst (AddVI src (LoadVector mem))); 7822 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 7823 ins_encode %{ 7824 int vector_len = 0; 7825 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7826 %} 7827 ins_pipe( pipe_slow ); 7828 %} 7829 7830 instruct vadd4I(vecX dst, vecX src) %{ 7831 predicate(n->as_Vector()->length() == 4); 7832 match(Set dst (AddVI dst src)); 7833 format %{ "paddd $dst,$src\t! add packed4I" %} 7834 ins_encode %{ 7835 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 7841 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7842 match(Set dst (AddVI src1 src2)); 7843 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 7844 ins_encode %{ 7845 int vector_len = 0; 7846 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 7852 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7853 match(Set dst (AddVI src (LoadVector mem))); 7854 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 7855 ins_encode %{ 7856 int vector_len = 0; 7857 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 7863 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7864 match(Set dst (AddVI src1 src2)); 7865 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 7866 ins_encode %{ 7867 int vector_len = 1; 7868 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 7874 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7875 match(Set dst (AddVI src (LoadVector mem))); 7876 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 7877 ins_encode %{ 7878 int vector_len = 1; 7879 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7885 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7886 match(Set dst (AddVI src1 src2)); 7887 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 7888 ins_encode %{ 7889 int vector_len = 2; 7890 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7891 %} 7892 ins_pipe( pipe_slow ); 7893 %} 7894 7895 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 7896 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7897 match(Set dst (AddVI src (LoadVector mem))); 7898 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 7899 ins_encode %{ 7900 int vector_len = 2; 7901 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 // Longs vector add 7907 instruct vadd2L(vecX dst, vecX src) %{ 7908 predicate(n->as_Vector()->length() == 2); 7909 match(Set dst (AddVL dst src)); 7910 format %{ "paddq $dst,$src\t! add packed2L" %} 7911 ins_encode %{ 7912 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 7918 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7919 match(Set dst (AddVL src1 src2)); 7920 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 7921 ins_encode %{ 7922 int vector_len = 0; 7923 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 7929 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7930 match(Set dst (AddVL src (LoadVector mem))); 7931 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 7932 ins_encode %{ 7933 int vector_len = 0; 7934 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 7940 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7941 match(Set dst (AddVL src1 src2)); 7942 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 7943 ins_encode %{ 7944 int vector_len = 1; 7945 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 7951 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7952 match(Set dst (AddVL src (LoadVector mem))); 7953 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 7954 ins_encode %{ 7955 int vector_len = 1; 7956 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7957 %} 7958 ins_pipe( pipe_slow ); 7959 %} 7960 7961 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7962 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7963 match(Set dst (AddVL src1 src2)); 7964 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 7965 ins_encode %{ 7966 int vector_len = 2; 7967 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7968 %} 7969 ins_pipe( pipe_slow ); 7970 %} 7971 7972 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 7973 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7974 match(Set dst (AddVL src (LoadVector mem))); 7975 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 7976 ins_encode %{ 7977 int vector_len = 2; 7978 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 // Floats vector add 7984 instruct vadd2F(vecD dst, vecD src) %{ 7985 predicate(n->as_Vector()->length() == 2); 7986 match(Set dst (AddVF dst src)); 7987 format %{ "addps $dst,$src\t! add packed2F" %} 7988 ins_encode %{ 7989 __ addps($dst$$XMMRegister, $src$$XMMRegister); 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 7995 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7996 match(Set dst (AddVF src1 src2)); 7997 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 7998 ins_encode %{ 7999 int vector_len = 0; 8000 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8001 %} 8002 ins_pipe( pipe_slow ); 8003 %} 8004 8005 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 8006 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8007 match(Set dst (AddVF src (LoadVector mem))); 8008 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 8009 ins_encode %{ 8010 int vector_len = 0; 8011 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 instruct vadd4F(vecX dst, vecX src) %{ 8017 predicate(n->as_Vector()->length() == 4); 8018 match(Set dst (AddVF dst src)); 8019 format %{ "addps $dst,$src\t! add packed4F" %} 8020 ins_encode %{ 8021 __ addps($dst$$XMMRegister, $src$$XMMRegister); 8022 %} 8023 ins_pipe( pipe_slow ); 8024 %} 8025 8026 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 8027 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8028 match(Set dst (AddVF src1 src2)); 8029 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 8030 ins_encode %{ 8031 int vector_len = 0; 8032 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8033 %} 8034 ins_pipe( pipe_slow ); 8035 %} 8036 8037 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 8038 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8039 match(Set dst (AddVF src (LoadVector mem))); 8040 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 8041 ins_encode %{ 8042 int vector_len = 0; 8043 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8044 %} 8045 ins_pipe( pipe_slow ); 8046 %} 8047 8048 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 8049 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8050 match(Set dst (AddVF src1 src2)); 8051 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 8052 ins_encode %{ 8053 int vector_len = 1; 8054 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8055 %} 8056 ins_pipe( pipe_slow ); 8057 %} 8058 8059 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 8060 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8061 match(Set dst (AddVF src (LoadVector mem))); 8062 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 8063 ins_encode %{ 8064 int vector_len = 1; 8065 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8066 %} 8067 ins_pipe( pipe_slow ); 8068 %} 8069 8070 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8071 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8072 match(Set dst (AddVF src1 src2)); 8073 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 8074 ins_encode %{ 8075 int vector_len = 2; 8076 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 8082 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8083 match(Set dst (AddVF src (LoadVector mem))); 8084 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 8085 ins_encode %{ 8086 int vector_len = 2; 8087 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 8092 // Doubles vector add 8093 instruct vadd2D(vecX dst, vecX src) %{ 8094 predicate(n->as_Vector()->length() == 2); 8095 match(Set dst (AddVD dst src)); 8096 format %{ "addpd $dst,$src\t! add packed2D" %} 8097 ins_encode %{ 8098 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 8099 %} 8100 ins_pipe( pipe_slow ); 8101 %} 8102 8103 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 8104 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8105 match(Set dst (AddVD src1 src2)); 8106 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 8107 ins_encode %{ 8108 int vector_len = 0; 8109 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8110 %} 8111 ins_pipe( pipe_slow ); 8112 %} 8113 8114 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 8115 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8116 match(Set dst (AddVD src (LoadVector mem))); 8117 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 8118 ins_encode %{ 8119 int vector_len = 0; 8120 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 8126 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8127 match(Set dst (AddVD src1 src2)); 8128 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 8129 ins_encode %{ 8130 int vector_len = 1; 8131 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8132 %} 8133 ins_pipe( pipe_slow ); 8134 %} 8135 8136 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 8137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8138 match(Set dst (AddVD src (LoadVector mem))); 8139 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 8140 ins_encode %{ 8141 int vector_len = 1; 8142 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8148 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8149 match(Set dst (AddVD src1 src2)); 8150 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 8151 ins_encode %{ 8152 int vector_len = 2; 8153 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8154 %} 8155 ins_pipe( pipe_slow ); 8156 %} 8157 8158 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 8159 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8160 match(Set dst (AddVD src (LoadVector mem))); 8161 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 8162 ins_encode %{ 8163 int vector_len = 2; 8164 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8165 %} 8166 ins_pipe( pipe_slow ); 8167 %} 8168 8169 // --------------------------------- SUB -------------------------------------- 8170 8171 // Bytes vector sub 8172 instruct vsub4B(vecS dst, vecS src) %{ 8173 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8174 match(Set dst (SubVB dst src)); 8175 format %{ "psubb $dst,$src\t! sub packed4B" %} 8176 ins_encode %{ 8177 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 8178 %} 8179 ins_pipe( pipe_slow ); 8180 %} 8181 8182 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 8183 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8184 match(Set dst (SubVB src1 src2)); 8185 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 8186 ins_encode %{ 8187 int vector_len = 0; 8188 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 8194 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8195 match(Set dst (SubVB src1 src2)); 8196 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 8197 ins_encode %{ 8198 int vector_len = 0; 8199 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8200 %} 8201 ins_pipe( pipe_slow ); 8202 %} 8203 8204 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 8205 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8206 match(Set dst (SubVB dst src2)); 8207 effect(TEMP src1); 8208 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 8209 ins_encode %{ 8210 int vector_len = 0; 8211 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8212 %} 8213 ins_pipe( pipe_slow ); 8214 %} 8215 8216 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 8217 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8218 match(Set dst (SubVB src (LoadVector mem))); 8219 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 8220 ins_encode %{ 8221 int vector_len = 0; 8222 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8223 %} 8224 ins_pipe( pipe_slow ); 8225 %} 8226 8227 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 8228 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8229 match(Set dst (SubVB src (LoadVector mem))); 8230 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 8231 ins_encode %{ 8232 int vector_len = 0; 8233 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8234 %} 8235 ins_pipe( pipe_slow ); 8236 %} 8237 8238 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 8239 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8240 match(Set dst (SubVB dst (LoadVector mem))); 8241 effect(TEMP src); 8242 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 8243 ins_encode %{ 8244 int vector_len = 0; 8245 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8246 %} 8247 ins_pipe( pipe_slow ); 8248 %} 8249 8250 instruct vsub8B(vecD dst, vecD src) %{ 8251 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8252 match(Set dst (SubVB dst src)); 8253 format %{ "psubb $dst,$src\t! sub packed8B" %} 8254 ins_encode %{ 8255 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 8256 %} 8257 ins_pipe( pipe_slow ); 8258 %} 8259 8260 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8261 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8262 match(Set dst (SubVB src1 src2)); 8263 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 8264 ins_encode %{ 8265 int vector_len = 0; 8266 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8267 %} 8268 ins_pipe( pipe_slow ); 8269 %} 8270 8271 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8272 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8273 match(Set dst (SubVB src1 src2)); 8274 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 8275 ins_encode %{ 8276 int vector_len = 0; 8277 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8278 %} 8279 ins_pipe( pipe_slow ); 8280 %} 8281 8282 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8283 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8284 match(Set dst (SubVB dst src2)); 8285 effect(TEMP src1); 8286 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 8287 ins_encode %{ 8288 int vector_len = 0; 8289 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8290 %} 8291 ins_pipe( pipe_slow ); 8292 %} 8293 8294 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 8295 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8296 match(Set dst (SubVB src (LoadVector mem))); 8297 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 8298 ins_encode %{ 8299 int vector_len = 0; 8300 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8301 %} 8302 ins_pipe( pipe_slow ); 8303 %} 8304 8305 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 8306 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8307 match(Set dst (SubVB src (LoadVector mem))); 8308 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 8309 ins_encode %{ 8310 int vector_len = 0; 8311 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8312 %} 8313 ins_pipe( pipe_slow ); 8314 %} 8315 8316 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8317 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8318 match(Set dst (SubVB dst (LoadVector mem))); 8319 effect(TEMP src); 8320 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 8321 ins_encode %{ 8322 int vector_len = 0; 8323 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8324 %} 8325 ins_pipe( pipe_slow ); 8326 %} 8327 8328 instruct vsub16B(vecX dst, vecX src) %{ 8329 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 8330 match(Set dst (SubVB dst src)); 8331 format %{ "psubb $dst,$src\t! sub packed16B" %} 8332 ins_encode %{ 8333 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 8334 %} 8335 ins_pipe( pipe_slow ); 8336 %} 8337 8338 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8339 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 8340 match(Set dst (SubVB src1 src2)); 8341 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 8342 ins_encode %{ 8343 int vector_len = 0; 8344 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8345 %} 8346 ins_pipe( pipe_slow ); 8347 %} 8348 8349 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8350 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8351 match(Set dst (SubVB src1 src2)); 8352 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 8353 ins_encode %{ 8354 int vector_len = 0; 8355 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8356 %} 8357 ins_pipe( pipe_slow ); 8358 %} 8359 8360 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8361 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8362 match(Set dst (SubVB dst src2)); 8363 effect(TEMP src1); 8364 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 8365 ins_encode %{ 8366 int vector_len = 0; 8367 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8368 %} 8369 ins_pipe( pipe_slow ); 8370 %} 8371 8372 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 8373 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 8374 match(Set dst (SubVB src (LoadVector mem))); 8375 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 8376 ins_encode %{ 8377 int vector_len = 0; 8378 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8379 %} 8380 ins_pipe( pipe_slow ); 8381 %} 8382 8383 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 8384 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8385 match(Set dst (SubVB src (LoadVector mem))); 8386 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 8387 ins_encode %{ 8388 int vector_len = 0; 8389 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8390 %} 8391 ins_pipe( pipe_slow ); 8392 %} 8393 8394 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8395 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8396 match(Set dst (SubVB dst (LoadVector mem))); 8397 effect(TEMP src); 8398 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 8399 ins_encode %{ 8400 int vector_len = 0; 8401 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8402 %} 8403 ins_pipe( pipe_slow ); 8404 %} 8405 8406 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8407 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 8408 match(Set dst (SubVB src1 src2)); 8409 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 8410 ins_encode %{ 8411 int vector_len = 1; 8412 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8413 %} 8414 ins_pipe( pipe_slow ); 8415 %} 8416 8417 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8418 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8419 match(Set dst (SubVB src1 src2)); 8420 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 8421 ins_encode %{ 8422 int vector_len = 1; 8423 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8424 %} 8425 ins_pipe( pipe_slow ); 8426 %} 8427 8428 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8429 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 8430 match(Set dst (SubVB dst src2)); 8431 effect(TEMP src1); 8432 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 8433 ins_encode %{ 8434 int vector_len = 1; 8435 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8436 %} 8437 ins_pipe( pipe_slow ); 8438 %} 8439 8440 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 8441 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 8442 match(Set dst (SubVB src (LoadVector mem))); 8443 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 8444 ins_encode %{ 8445 int vector_len = 1; 8446 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 8452 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8453 match(Set dst (SubVB src (LoadVector mem))); 8454 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 8455 ins_encode %{ 8456 int vector_len = 1; 8457 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8458 %} 8459 ins_pipe( pipe_slow ); 8460 %} 8461 8462 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8463 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 8464 match(Set dst (SubVB dst (LoadVector mem))); 8465 effect(TEMP src); 8466 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 8467 ins_encode %{ 8468 int vector_len = 1; 8469 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8470 %} 8471 ins_pipe( pipe_slow ); 8472 %} 8473 8474 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8475 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 8476 match(Set dst (SubVB src1 src2)); 8477 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 8478 ins_encode %{ 8479 int vector_len = 2; 8480 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8481 %} 8482 ins_pipe( pipe_slow ); 8483 %} 8484 8485 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 8486 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 8487 match(Set dst (SubVB src (LoadVector mem))); 8488 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 8489 ins_encode %{ 8490 int vector_len = 2; 8491 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8492 %} 8493 ins_pipe( pipe_slow ); 8494 %} 8495 8496 // Shorts/Chars vector sub 8497 instruct vsub2S(vecS dst, vecS src) %{ 8498 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8499 match(Set dst (SubVS dst src)); 8500 format %{ "psubw $dst,$src\t! sub packed2S" %} 8501 ins_encode %{ 8502 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 8503 %} 8504 ins_pipe( pipe_slow ); 8505 %} 8506 8507 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 8508 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8509 match(Set dst (SubVS src1 src2)); 8510 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 8511 ins_encode %{ 8512 int vector_len = 0; 8513 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8514 %} 8515 ins_pipe( pipe_slow ); 8516 %} 8517 8518 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 8519 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8520 match(Set dst (SubVS src1 src2)); 8521 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 8522 ins_encode %{ 8523 int vector_len = 0; 8524 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8525 %} 8526 ins_pipe( pipe_slow ); 8527 %} 8528 8529 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 8530 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8531 match(Set dst (SubVS dst src2)); 8532 effect(TEMP src1); 8533 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 8534 ins_encode %{ 8535 int vector_len = 0; 8536 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8537 %} 8538 ins_pipe( pipe_slow ); 8539 %} 8540 8541 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 8542 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8543 match(Set dst (SubVS src (LoadVector mem))); 8544 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 8545 ins_encode %{ 8546 int vector_len = 0; 8547 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 8553 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8554 match(Set dst (SubVS src (LoadVector mem))); 8555 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 8556 ins_encode %{ 8557 int vector_len = 0; 8558 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8559 %} 8560 ins_pipe( pipe_slow ); 8561 %} 8562 8563 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 8564 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8565 match(Set dst (SubVS dst (LoadVector mem))); 8566 effect(TEMP src); 8567 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 8568 ins_encode %{ 8569 int vector_len = 0; 8570 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8571 %} 8572 ins_pipe( pipe_slow ); 8573 %} 8574 8575 instruct vsub4S(vecD dst, vecD src) %{ 8576 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8577 match(Set dst (SubVS dst src)); 8578 format %{ "psubw $dst,$src\t! sub packed4S" %} 8579 ins_encode %{ 8580 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 8581 %} 8582 ins_pipe( pipe_slow ); 8583 %} 8584 8585 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8586 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8587 match(Set dst (SubVS src1 src2)); 8588 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 8589 ins_encode %{ 8590 int vector_len = 0; 8591 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8592 %} 8593 ins_pipe( pipe_slow ); 8594 %} 8595 8596 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8597 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8598 match(Set dst (SubVS src1 src2)); 8599 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 8600 ins_encode %{ 8601 int vector_len = 0; 8602 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8603 %} 8604 ins_pipe( pipe_slow ); 8605 %} 8606 8607 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8608 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8609 match(Set dst (SubVS dst src2)); 8610 effect(TEMP src1); 8611 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 8612 ins_encode %{ 8613 int vector_len = 0; 8614 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8615 %} 8616 ins_pipe( pipe_slow ); 8617 %} 8618 8619 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 8620 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8621 match(Set dst (SubVS src (LoadVector mem))); 8622 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 8623 ins_encode %{ 8624 int vector_len = 0; 8625 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8626 %} 8627 ins_pipe( pipe_slow ); 8628 %} 8629 8630 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 8631 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8632 match(Set dst (SubVS src (LoadVector mem))); 8633 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 8634 ins_encode %{ 8635 int vector_len = 0; 8636 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8637 %} 8638 ins_pipe( pipe_slow ); 8639 %} 8640 8641 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8642 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8643 match(Set dst (SubVS dst (LoadVector mem))); 8644 effect(TEMP src); 8645 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 8646 ins_encode %{ 8647 int vector_len = 0; 8648 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8649 %} 8650 ins_pipe( pipe_slow ); 8651 %} 8652 8653 instruct vsub8S(vecX dst, vecX src) %{ 8654 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8655 match(Set dst (SubVS dst src)); 8656 format %{ "psubw $dst,$src\t! sub packed8S" %} 8657 ins_encode %{ 8658 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 8659 %} 8660 ins_pipe( pipe_slow ); 8661 %} 8662 8663 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8664 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8665 match(Set dst (SubVS src1 src2)); 8666 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 8667 ins_encode %{ 8668 int vector_len = 0; 8669 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8670 %} 8671 ins_pipe( pipe_slow ); 8672 %} 8673 8674 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8675 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8676 match(Set dst (SubVS src1 src2)); 8677 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 8678 ins_encode %{ 8679 int vector_len = 0; 8680 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8681 %} 8682 ins_pipe( pipe_slow ); 8683 %} 8684 8685 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8686 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8687 match(Set dst (SubVS dst src2)); 8688 effect(TEMP src1); 8689 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 8690 ins_encode %{ 8691 int vector_len = 0; 8692 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8693 %} 8694 ins_pipe( pipe_slow ); 8695 %} 8696 8697 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8698 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8699 match(Set dst (SubVS src (LoadVector mem))); 8700 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 8701 ins_encode %{ 8702 int vector_len = 0; 8703 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8704 %} 8705 ins_pipe( pipe_slow ); 8706 %} 8707 8708 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8709 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8710 match(Set dst (SubVS src (LoadVector mem))); 8711 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 8712 ins_encode %{ 8713 int vector_len = 0; 8714 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8715 %} 8716 ins_pipe( pipe_slow ); 8717 %} 8718 8719 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8720 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8721 match(Set dst (SubVS dst (LoadVector mem))); 8722 effect(TEMP src); 8723 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 8724 ins_encode %{ 8725 int vector_len = 0; 8726 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8727 %} 8728 ins_pipe( pipe_slow ); 8729 %} 8730 8731 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8732 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8733 match(Set dst (SubVS src1 src2)); 8734 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 8735 ins_encode %{ 8736 int vector_len = 1; 8737 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8738 %} 8739 ins_pipe( pipe_slow ); 8740 %} 8741 8742 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8743 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8744 match(Set dst (SubVS src1 src2)); 8745 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 8746 ins_encode %{ 8747 int vector_len = 1; 8748 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8749 %} 8750 ins_pipe( pipe_slow ); 8751 %} 8752 8753 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8754 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8755 match(Set dst (SubVS dst src2)); 8756 effect(TEMP src1); 8757 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 8758 ins_encode %{ 8759 int vector_len = 1; 8760 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8766 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8767 match(Set dst (SubVS src (LoadVector mem))); 8768 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 8769 ins_encode %{ 8770 int vector_len = 1; 8771 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8772 %} 8773 ins_pipe( pipe_slow ); 8774 %} 8775 8776 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8777 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8778 match(Set dst (SubVS src (LoadVector mem))); 8779 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 8780 ins_encode %{ 8781 int vector_len = 1; 8782 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8783 %} 8784 ins_pipe( pipe_slow ); 8785 %} 8786 8787 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8788 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8789 match(Set dst (SubVS dst (LoadVector mem))); 8790 effect(TEMP src); 8791 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 8792 ins_encode %{ 8793 int vector_len = 1; 8794 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8800 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8801 match(Set dst (SubVS src1 src2)); 8802 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 8803 ins_encode %{ 8804 int vector_len = 2; 8805 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 8811 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8812 match(Set dst (SubVS src (LoadVector mem))); 8813 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 8814 ins_encode %{ 8815 int vector_len = 2; 8816 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8817 %} 8818 ins_pipe( pipe_slow ); 8819 %} 8820 8821 // Integers vector sub 8822 instruct vsub2I(vecD dst, vecD src) %{ 8823 predicate(n->as_Vector()->length() == 2); 8824 match(Set dst (SubVI dst src)); 8825 format %{ "psubd $dst,$src\t! sub packed2I" %} 8826 ins_encode %{ 8827 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 8828 %} 8829 ins_pipe( pipe_slow ); 8830 %} 8831 8832 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 8833 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8834 match(Set dst (SubVI src1 src2)); 8835 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 8836 ins_encode %{ 8837 int vector_len = 0; 8838 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8839 %} 8840 ins_pipe( pipe_slow ); 8841 %} 8842 8843 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 8844 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8845 match(Set dst (SubVI src (LoadVector mem))); 8846 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 8847 ins_encode %{ 8848 int vector_len = 0; 8849 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8850 %} 8851 ins_pipe( pipe_slow ); 8852 %} 8853 8854 instruct vsub4I(vecX dst, vecX src) %{ 8855 predicate(n->as_Vector()->length() == 4); 8856 match(Set dst (SubVI dst src)); 8857 format %{ "psubd $dst,$src\t! sub packed4I" %} 8858 ins_encode %{ 8859 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 8860 %} 8861 ins_pipe( pipe_slow ); 8862 %} 8863 8864 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 8865 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8866 match(Set dst (SubVI src1 src2)); 8867 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 8868 ins_encode %{ 8869 int vector_len = 0; 8870 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8871 %} 8872 ins_pipe( pipe_slow ); 8873 %} 8874 8875 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 8876 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8877 match(Set dst (SubVI src (LoadVector mem))); 8878 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 8879 ins_encode %{ 8880 int vector_len = 0; 8881 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8882 %} 8883 ins_pipe( pipe_slow ); 8884 %} 8885 8886 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 8887 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8888 match(Set dst (SubVI src1 src2)); 8889 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 8890 ins_encode %{ 8891 int vector_len = 1; 8892 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8893 %} 8894 ins_pipe( pipe_slow ); 8895 %} 8896 8897 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 8898 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8899 match(Set dst (SubVI src (LoadVector mem))); 8900 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 8901 ins_encode %{ 8902 int vector_len = 1; 8903 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8904 %} 8905 ins_pipe( pipe_slow ); 8906 %} 8907 8908 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8909 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8910 match(Set dst (SubVI src1 src2)); 8911 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 8912 ins_encode %{ 8913 int vector_len = 2; 8914 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8915 %} 8916 ins_pipe( pipe_slow ); 8917 %} 8918 8919 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 8920 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8921 match(Set dst (SubVI src (LoadVector mem))); 8922 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 8923 ins_encode %{ 8924 int vector_len = 2; 8925 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8926 %} 8927 ins_pipe( pipe_slow ); 8928 %} 8929 8930 // Longs vector sub 8931 instruct vsub2L(vecX dst, vecX src) %{ 8932 predicate(n->as_Vector()->length() == 2); 8933 match(Set dst (SubVL dst src)); 8934 format %{ "psubq $dst,$src\t! sub packed2L" %} 8935 ins_encode %{ 8936 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 8937 %} 8938 ins_pipe( pipe_slow ); 8939 %} 8940 8941 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 8942 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8943 match(Set dst (SubVL src1 src2)); 8944 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 8945 ins_encode %{ 8946 int vector_len = 0; 8947 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8948 %} 8949 ins_pipe( pipe_slow ); 8950 %} 8951 8952 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 8953 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8954 match(Set dst (SubVL src (LoadVector mem))); 8955 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 8956 ins_encode %{ 8957 int vector_len = 0; 8958 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8959 %} 8960 ins_pipe( pipe_slow ); 8961 %} 8962 8963 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 8964 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8965 match(Set dst (SubVL src1 src2)); 8966 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 8967 ins_encode %{ 8968 int vector_len = 1; 8969 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8970 %} 8971 ins_pipe( pipe_slow ); 8972 %} 8973 8974 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 8975 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8976 match(Set dst (SubVL src (LoadVector mem))); 8977 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 8978 ins_encode %{ 8979 int vector_len = 1; 8980 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8981 %} 8982 ins_pipe( pipe_slow ); 8983 %} 8984 8985 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8986 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8987 match(Set dst (SubVL src1 src2)); 8988 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 8989 ins_encode %{ 8990 int vector_len = 2; 8991 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8992 %} 8993 ins_pipe( pipe_slow ); 8994 %} 8995 8996 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 8997 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8998 match(Set dst (SubVL src (LoadVector mem))); 8999 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 9000 ins_encode %{ 9001 int vector_len = 2; 9002 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9003 %} 9004 ins_pipe( pipe_slow ); 9005 %} 9006 9007 // Floats vector sub 9008 instruct vsub2F(vecD dst, vecD src) %{ 9009 predicate(n->as_Vector()->length() == 2); 9010 match(Set dst (SubVF dst src)); 9011 format %{ "subps $dst,$src\t! sub packed2F" %} 9012 ins_encode %{ 9013 __ subps($dst$$XMMRegister, $src$$XMMRegister); 9014 %} 9015 ins_pipe( pipe_slow ); 9016 %} 9017 9018 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 9019 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9020 match(Set dst (SubVF src1 src2)); 9021 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 9022 ins_encode %{ 9023 int vector_len = 0; 9024 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9025 %} 9026 ins_pipe( pipe_slow ); 9027 %} 9028 9029 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 9030 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9031 match(Set dst (SubVF src (LoadVector mem))); 9032 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 9033 ins_encode %{ 9034 int vector_len = 0; 9035 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9036 %} 9037 ins_pipe( pipe_slow ); 9038 %} 9039 9040 instruct vsub4F(vecX dst, vecX src) %{ 9041 predicate(n->as_Vector()->length() == 4); 9042 match(Set dst (SubVF dst src)); 9043 format %{ "subps $dst,$src\t! sub packed4F" %} 9044 ins_encode %{ 9045 __ subps($dst$$XMMRegister, $src$$XMMRegister); 9046 %} 9047 ins_pipe( pipe_slow ); 9048 %} 9049 9050 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 9051 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9052 match(Set dst (SubVF src1 src2)); 9053 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 9054 ins_encode %{ 9055 int vector_len = 0; 9056 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9057 %} 9058 ins_pipe( pipe_slow ); 9059 %} 9060 9061 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 9062 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9063 match(Set dst (SubVF src (LoadVector mem))); 9064 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 9065 ins_encode %{ 9066 int vector_len = 0; 9067 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9068 %} 9069 ins_pipe( pipe_slow ); 9070 %} 9071 9072 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 9073 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9074 match(Set dst (SubVF src1 src2)); 9075 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 9076 ins_encode %{ 9077 int vector_len = 1; 9078 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9079 %} 9080 ins_pipe( pipe_slow ); 9081 %} 9082 9083 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 9084 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9085 match(Set dst (SubVF src (LoadVector mem))); 9086 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 9087 ins_encode %{ 9088 int vector_len = 1; 9089 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9090 %} 9091 ins_pipe( pipe_slow ); 9092 %} 9093 9094 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9095 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9096 match(Set dst (SubVF src1 src2)); 9097 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 9098 ins_encode %{ 9099 int vector_len = 2; 9100 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9101 %} 9102 ins_pipe( pipe_slow ); 9103 %} 9104 9105 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 9106 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9107 match(Set dst (SubVF src (LoadVector mem))); 9108 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 9109 ins_encode %{ 9110 int vector_len = 2; 9111 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9112 %} 9113 ins_pipe( pipe_slow ); 9114 %} 9115 9116 // Doubles vector sub 9117 instruct vsub2D(vecX dst, vecX src) %{ 9118 predicate(n->as_Vector()->length() == 2); 9119 match(Set dst (SubVD dst src)); 9120 format %{ "subpd $dst,$src\t! sub packed2D" %} 9121 ins_encode %{ 9122 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 9123 %} 9124 ins_pipe( pipe_slow ); 9125 %} 9126 9127 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 9128 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9129 match(Set dst (SubVD src1 src2)); 9130 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 9131 ins_encode %{ 9132 int vector_len = 0; 9133 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9134 %} 9135 ins_pipe( pipe_slow ); 9136 %} 9137 9138 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 9139 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9140 match(Set dst (SubVD src (LoadVector mem))); 9141 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 9142 ins_encode %{ 9143 int vector_len = 0; 9144 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9145 %} 9146 ins_pipe( pipe_slow ); 9147 %} 9148 9149 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 9150 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9151 match(Set dst (SubVD src1 src2)); 9152 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 9153 ins_encode %{ 9154 int vector_len = 1; 9155 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9156 %} 9157 ins_pipe( pipe_slow ); 9158 %} 9159 9160 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 9161 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9162 match(Set dst (SubVD src (LoadVector mem))); 9163 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 9164 ins_encode %{ 9165 int vector_len = 1; 9166 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9167 %} 9168 ins_pipe( pipe_slow ); 9169 %} 9170 9171 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9172 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9173 match(Set dst (SubVD src1 src2)); 9174 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 9175 ins_encode %{ 9176 int vector_len = 2; 9177 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9178 %} 9179 ins_pipe( pipe_slow ); 9180 %} 9181 9182 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 9183 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9184 match(Set dst (SubVD src (LoadVector mem))); 9185 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 9186 ins_encode %{ 9187 int vector_len = 2; 9188 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9189 %} 9190 ins_pipe( pipe_slow ); 9191 %} 9192 9193 // --------------------------------- MUL -------------------------------------- 9194 9195 // Byte vector mul 9196 9197 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ 9198 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 9199 match(Set dst (MulVB src1 src2)); 9200 effect(TEMP dst, TEMP tmp2, TEMP tmp); 9201 format %{"pmovsxbw $tmp,$src1\n\t" 9202 "pmovsxbw $tmp2,$src2\n\t" 9203 "pmullw $tmp,$tmp2\n\t" 9204 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 9205 "pand $tmp,$tmp2\n\t" 9206 "packuswb $tmp,$tmp\n\t" 9207 "movss $dst,$tmp\t! mul packed4B" %} 9208 ins_encode %{ 9209 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 9210 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 9211 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9212 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 9213 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 9214 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 9215 __ movss($dst$$XMMRegister, $tmp$$XMMRegister); 9216 %} 9217 ins_pipe( pipe_slow ); 9218 %} 9219 9220 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ 9221 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 9222 match(Set dst (MulVB src1 src2)); 9223 effect(TEMP dst, TEMP tmp2, TEMP tmp); 9224 format %{"pmovsxbw $tmp,$src1\n\t" 9225 "pmovsxbw $tmp2,$src2\n\t" 9226 "pmullw $tmp,$tmp2\n\t" 9227 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 9228 "pand $tmp,$tmp2\n\t" 9229 "packuswb $tmp,$tmp\n\t" 9230 "movsd $dst,$tmp\t! mul packed8B" %} 9231 ins_encode %{ 9232 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 9233 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 9234 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9235 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 9236 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 9237 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 9238 __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); 9239 %} 9240 ins_pipe( pipe_slow ); 9241 %} 9242 9243 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ 9244 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 9245 match(Set dst (MulVB src1 src2)); 9246 effect(TEMP tmp3, TEMP tmp2, TEMP tmp); 9247 format %{"pmovsxbw $tmp,$src1\n\t" 9248 "pmovsxbw $tmp2,$src2\n\t" 9249 "pmullw $tmp,$tmp2\n\t" 9250 "pshufd $tmp2,$src1\n\t" 9251 "pshufd $tmp3,$src2\n\t" 9252 "pmovsxbw $tmp2,$tmp2\n\t" 9253 "pmovsxbw $tmp3,$tmp3\n\t" 9254 "pmullw $tmp2,$tmp3\n\t" 9255 "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" 9256 "pand $tmp,$tmp3\n\t" 9257 "pand $tmp2,$tmp3\n\t" 9258 "packuswb $tmp,$tmp2\n\t" 9259 "movdqu $dst,$tmp \n\t! mul packed16B" %} 9260 ins_encode %{ 9261 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 9262 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 9263 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9264 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); 9265 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); 9266 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 9267 __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); 9268 __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); 9269 __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 9270 __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); 9271 __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); 9272 __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); 9273 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ 9279 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9280 match(Set dst (MulVB src1 src2)); 9281 effect(TEMP dst, TEMP tmp2, TEMP tmp); 9282 format %{"vpmovsxbw $tmp,$src1\n\t" 9283 "vpmovsxbw $tmp2,$src2\n\t" 9284 "vpmullw $tmp,$tmp2\n\t" 9285 "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 9286 "vpand $tmp,$tmp2\n\t" 9287 "vextracti128_high $tmp2,$tmp\n\t" 9288 "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} 9289 ins_encode %{ 9290 int vector_len = 1; 9291 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 9292 __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9293 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9294 __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 9295 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9296 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 9297 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 9298 %} 9299 ins_pipe( pipe_slow ); 9300 %} 9301 9302 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ 9303 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 9304 match(Set dst (MulVB src1 src2)); 9305 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); 9306 format %{"vextracti128_high $tmp1,$src1\n\t" 9307 "vextracti128_high $tmp3,$src2\n\t" 9308 "vpmovsxbw $tmp1,$tmp1\n\t" 9309 "vpmovsxbw $tmp3,$tmp3\n\t" 9310 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 9311 "vpmovsxbw $tmp2,$src1\n\t" 9312 "vpmovsxbw $tmp3,$src2\n\t" 9313 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 9314 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 9315 "vpbroadcastd $tmp3, $tmp3\n\t" 9316 "vpand $tmp2,$tmp2,$tmp3\n\t" 9317 "vpand $tmp1,$tmp1,$tmp3\n\t" 9318 "vpackuswb $dst,$tmp2,$tmp1\n\t" 9319 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 9320 ins_encode %{ 9321 int vector_len = 1; 9322 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 9323 __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); 9324 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9325 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9326 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9327 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 9328 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 9329 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9330 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 9331 __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister); 9332 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9333 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9334 __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9335 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 9336 %} 9337 ins_pipe( pipe_slow ); 9338 %} 9339 9340 // Shorts/Chars vector mul 9341 instruct vmul2S(vecS dst, vecS src) %{ 9342 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9343 match(Set dst (MulVS dst src)); 9344 format %{ "pmullw $dst,$src\t! mul packed2S" %} 9345 ins_encode %{ 9346 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 9347 %} 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 9352 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9353 match(Set dst (MulVS src1 src2)); 9354 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 9355 ins_encode %{ 9356 int vector_len = 0; 9357 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9358 %} 9359 ins_pipe( pipe_slow ); 9360 %} 9361 9362 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 9363 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9364 match(Set dst (MulVS src1 src2)); 9365 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 9366 ins_encode %{ 9367 int vector_len = 0; 9368 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 9374 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9375 match(Set dst (MulVS dst src2)); 9376 effect(TEMP src1); 9377 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 9378 ins_encode %{ 9379 int vector_len = 0; 9380 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9381 %} 9382 ins_pipe( pipe_slow ); 9383 %} 9384 9385 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 9386 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9387 match(Set dst (MulVS src (LoadVector mem))); 9388 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 9389 ins_encode %{ 9390 int vector_len = 0; 9391 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9392 %} 9393 ins_pipe( pipe_slow ); 9394 %} 9395 9396 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 9397 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9398 match(Set dst (MulVS src (LoadVector mem))); 9399 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 9400 ins_encode %{ 9401 int vector_len = 0; 9402 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9403 %} 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 9408 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9409 match(Set dst (MulVS dst (LoadVector mem))); 9410 effect(TEMP src); 9411 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 9412 ins_encode %{ 9413 int vector_len = 0; 9414 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 instruct vmul4S(vecD dst, vecD src) %{ 9420 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9421 match(Set dst (MulVS dst src)); 9422 format %{ "pmullw $dst,$src\t! mul packed4S" %} 9423 ins_encode %{ 9424 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 9425 %} 9426 ins_pipe( pipe_slow ); 9427 %} 9428 9429 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 9430 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9431 match(Set dst (MulVS src1 src2)); 9432 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 9433 ins_encode %{ 9434 int vector_len = 0; 9435 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9436 %} 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 9441 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9442 match(Set dst (MulVS src1 src2)); 9443 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 9444 ins_encode %{ 9445 int vector_len = 0; 9446 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9447 %} 9448 ins_pipe( pipe_slow ); 9449 %} 9450 9451 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 9452 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9453 match(Set dst (MulVS dst src2)); 9454 effect(TEMP src1); 9455 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 9456 ins_encode %{ 9457 int vector_len = 0; 9458 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9459 %} 9460 ins_pipe( pipe_slow ); 9461 %} 9462 9463 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 9464 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9465 match(Set dst (MulVS src (LoadVector mem))); 9466 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 9467 ins_encode %{ 9468 int vector_len = 0; 9469 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9470 %} 9471 ins_pipe( pipe_slow ); 9472 %} 9473 9474 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 9475 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9476 match(Set dst (MulVS src (LoadVector mem))); 9477 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 9478 ins_encode %{ 9479 int vector_len = 0; 9480 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9481 %} 9482 ins_pipe( pipe_slow ); 9483 %} 9484 9485 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 9486 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9487 match(Set dst (MulVS dst (LoadVector mem))); 9488 effect(TEMP src); 9489 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 9490 ins_encode %{ 9491 int vector_len = 0; 9492 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9493 %} 9494 ins_pipe( pipe_slow ); 9495 %} 9496 9497 instruct vmul8S(vecX dst, vecX src) %{ 9498 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9499 match(Set dst (MulVS dst src)); 9500 format %{ "pmullw $dst,$src\t! mul packed8S" %} 9501 ins_encode %{ 9502 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 9503 %} 9504 ins_pipe( pipe_slow ); 9505 %} 9506 9507 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 9508 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9509 match(Set dst (MulVS src1 src2)); 9510 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 9511 ins_encode %{ 9512 int vector_len = 0; 9513 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 9519 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9520 match(Set dst (MulVS src1 src2)); 9521 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 9522 ins_encode %{ 9523 int vector_len = 0; 9524 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 9530 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9531 match(Set dst (MulVS dst src2)); 9532 effect(TEMP src1); 9533 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 9534 ins_encode %{ 9535 int vector_len = 0; 9536 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9537 %} 9538 ins_pipe( pipe_slow ); 9539 %} 9540 9541 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 9542 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9543 match(Set dst (MulVS src (LoadVector mem))); 9544 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 9545 ins_encode %{ 9546 int vector_len = 0; 9547 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 9553 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9554 match(Set dst (MulVS src (LoadVector mem))); 9555 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 9556 ins_encode %{ 9557 int vector_len = 0; 9558 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 9564 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9565 match(Set dst (MulVS dst (LoadVector mem))); 9566 effect(TEMP src); 9567 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 9568 ins_encode %{ 9569 int vector_len = 0; 9570 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 9576 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9577 match(Set dst (MulVS src1 src2)); 9578 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 9579 ins_encode %{ 9580 int vector_len = 1; 9581 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9582 %} 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 9587 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9588 match(Set dst (MulVS src1 src2)); 9589 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 9590 ins_encode %{ 9591 int vector_len = 1; 9592 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 9598 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9599 match(Set dst (MulVS dst src2)); 9600 effect(TEMP src1); 9601 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 9602 ins_encode %{ 9603 int vector_len = 1; 9604 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9605 %} 9606 ins_pipe( pipe_slow ); 9607 %} 9608 9609 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 9610 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9611 match(Set dst (MulVS src (LoadVector mem))); 9612 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 9613 ins_encode %{ 9614 int vector_len = 1; 9615 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 9621 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9622 match(Set dst (MulVS src (LoadVector mem))); 9623 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 9624 ins_encode %{ 9625 int vector_len = 1; 9626 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 9632 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9633 match(Set dst (MulVS dst (LoadVector mem))); 9634 effect(TEMP src); 9635 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 9636 ins_encode %{ 9637 int vector_len = 1; 9638 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9639 %} 9640 ins_pipe( pipe_slow ); 9641 %} 9642 9643 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9644 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9645 match(Set dst (MulVS src1 src2)); 9646 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 9647 ins_encode %{ 9648 int vector_len = 2; 9649 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9650 %} 9651 ins_pipe( pipe_slow ); 9652 %} 9653 9654 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 9655 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9656 match(Set dst (MulVS src (LoadVector mem))); 9657 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 9658 ins_encode %{ 9659 int vector_len = 2; 9660 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 // Integers vector mul (sse4_1) 9666 instruct vmul2I(vecD dst, vecD src) %{ 9667 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 9668 match(Set dst (MulVI dst src)); 9669 format %{ "pmulld $dst,$src\t! mul packed2I" %} 9670 ins_encode %{ 9671 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 9672 %} 9673 ins_pipe( pipe_slow ); 9674 %} 9675 9676 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 9677 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9678 match(Set dst (MulVI src1 src2)); 9679 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 9680 ins_encode %{ 9681 int vector_len = 0; 9682 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9683 %} 9684 ins_pipe( pipe_slow ); 9685 %} 9686 9687 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 9688 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9689 match(Set dst (MulVI src (LoadVector mem))); 9690 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 9691 ins_encode %{ 9692 int vector_len = 0; 9693 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9694 %} 9695 ins_pipe( pipe_slow ); 9696 %} 9697 9698 instruct vmul4I(vecX dst, vecX src) %{ 9699 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 9700 match(Set dst (MulVI dst src)); 9701 format %{ "pmulld $dst,$src\t! mul packed4I" %} 9702 ins_encode %{ 9703 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 9704 %} 9705 ins_pipe( pipe_slow ); 9706 %} 9707 9708 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 9709 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9710 match(Set dst (MulVI src1 src2)); 9711 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 9712 ins_encode %{ 9713 int vector_len = 0; 9714 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9715 %} 9716 ins_pipe( pipe_slow ); 9717 %} 9718 9719 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 9720 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9721 match(Set dst (MulVI src (LoadVector mem))); 9722 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 9723 ins_encode %{ 9724 int vector_len = 0; 9725 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9726 %} 9727 ins_pipe( pipe_slow ); 9728 %} 9729 9730 // Long vector mul 9731 9732 instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ 9733 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); 9734 match(Set dst (MulVL dst src2)); 9735 effect(TEMP dst, TEMP tmp); 9736 format %{ "pshufd $tmp,$src2, 177\n\t" 9737 "pmulld $tmp,$dst\n\t" 9738 "phaddd $tmp,$tmp\n\t" 9739 "pmovzxdq $tmp,$tmp\n\t" 9740 "psllq $tmp, 32\n\t" 9741 "pmuludq $dst,$src2\n\t" 9742 "paddq $dst,$tmp\n\t! mul packed2L" %} 9743 9744 ins_encode %{ 9745 int vector_len = 0; 9746 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 9747 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 9748 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 9749 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 9750 __ psllq($tmp$$XMMRegister, 32); 9751 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 9752 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 9753 %} 9754 ins_pipe( pipe_slow ); 9755 %} 9756 9757 instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ 9758 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); 9759 match(Set dst (MulVL src1 src2)); 9760 effect(TEMP tmp1, TEMP tmp); 9761 format %{ "vpshufd $tmp,$src2\n\t" 9762 "vpmulld $tmp,$src1,$tmp\n\t" 9763 "vphaddd $tmp,$tmp,$tmp\n\t" 9764 "vpmovzxdq $tmp,$tmp\n\t" 9765 "vpsllq $tmp,$tmp\n\t" 9766 "vpmuludq $tmp1,$src1,$src2\n\t" 9767 "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} 9768 ins_encode %{ 9769 int vector_len = 0; 9770 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 9771 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 9772 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9773 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9774 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 9775 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9776 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9777 %} 9778 ins_pipe( pipe_slow ); 9779 %} 9780 9781 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 9782 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 9783 match(Set dst (MulVL src1 src2)); 9784 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 9785 ins_encode %{ 9786 int vector_len = 0; 9787 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9788 %} 9789 ins_pipe( pipe_slow ); 9790 %} 9791 9792 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 9793 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 9794 match(Set dst (MulVL src (LoadVector mem))); 9795 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 9796 ins_encode %{ 9797 int vector_len = 0; 9798 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9799 %} 9800 ins_pipe( pipe_slow ); 9801 %} 9802 9803 instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ 9804 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); 9805 match(Set dst (MulVL src1 src2)); 9806 effect(TEMP tmp1, TEMP tmp); 9807 format %{ "vpshufd $tmp,$src2\n\t" 9808 "vpmulld $tmp,$src1,$tmp\n\t" 9809 "vphaddd $tmp,$tmp,$tmp\n\t" 9810 "vpmovzxdq $tmp,$tmp\n\t" 9811 "vpsllq $tmp,$tmp\n\t" 9812 "vpmuludq $tmp1,$src1,$src2\n\t" 9813 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 9814 ins_encode %{ 9815 int vector_len = 1; 9816 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 9817 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 9818 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9819 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9820 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 9821 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9822 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9823 %} 9824 ins_pipe( pipe_slow ); 9825 %} 9826 9827 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 9828 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 9829 match(Set dst (MulVL src1 src2)); 9830 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 9831 ins_encode %{ 9832 int vector_len = 1; 9833 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9834 %} 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 9839 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 9840 match(Set dst (MulVL src (LoadVector mem))); 9841 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 9842 ins_encode %{ 9843 int vector_len = 1; 9844 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9850 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 9851 match(Set dst (MulVL src1 src2)); 9852 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 9853 ins_encode %{ 9854 int vector_len = 2; 9855 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9856 %} 9857 ins_pipe( pipe_slow ); 9858 %} 9859 9860 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 9861 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 9862 match(Set dst (MulVL src (LoadVector mem))); 9863 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 9864 ins_encode %{ 9865 int vector_len = 2; 9866 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9867 %} 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 9872 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9873 match(Set dst (MulVI src1 src2)); 9874 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 9875 ins_encode %{ 9876 int vector_len = 1; 9877 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9878 %} 9879 ins_pipe( pipe_slow ); 9880 %} 9881 9882 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 9883 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9884 match(Set dst (MulVI src (LoadVector mem))); 9885 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 9886 ins_encode %{ 9887 int vector_len = 1; 9888 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9889 %} 9890 ins_pipe( pipe_slow ); 9891 %} 9892 9893 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9894 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9895 match(Set dst (MulVI src1 src2)); 9896 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 9897 ins_encode %{ 9898 int vector_len = 2; 9899 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9900 %} 9901 ins_pipe( pipe_slow ); 9902 %} 9903 9904 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 9905 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9906 match(Set dst (MulVI src (LoadVector mem))); 9907 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 9908 ins_encode %{ 9909 int vector_len = 2; 9910 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9911 %} 9912 ins_pipe( pipe_slow ); 9913 %} 9914 9915 // Floats vector mul 9916 instruct vmul2F(vecD dst, vecD src) %{ 9917 predicate(n->as_Vector()->length() == 2); 9918 match(Set dst (MulVF dst src)); 9919 format %{ "mulps $dst,$src\t! mul packed2F" %} 9920 ins_encode %{ 9921 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 9922 %} 9923 ins_pipe( pipe_slow ); 9924 %} 9925 9926 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 9927 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9928 match(Set dst (MulVF src1 src2)); 9929 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 9930 ins_encode %{ 9931 int vector_len = 0; 9932 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9933 %} 9934 ins_pipe( pipe_slow ); 9935 %} 9936 9937 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 9938 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9939 match(Set dst (MulVF src (LoadVector mem))); 9940 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 9941 ins_encode %{ 9942 int vector_len = 0; 9943 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9944 %} 9945 ins_pipe( pipe_slow ); 9946 %} 9947 9948 instruct vmul4F(vecX dst, vecX src) %{ 9949 predicate(n->as_Vector()->length() == 4); 9950 match(Set dst (MulVF dst src)); 9951 format %{ "mulps $dst,$src\t! mul packed4F" %} 9952 ins_encode %{ 9953 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 9954 %} 9955 ins_pipe( pipe_slow ); 9956 %} 9957 9958 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 9959 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9960 match(Set dst (MulVF src1 src2)); 9961 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 9962 ins_encode %{ 9963 int vector_len = 0; 9964 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9965 %} 9966 ins_pipe( pipe_slow ); 9967 %} 9968 9969 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 9970 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9971 match(Set dst (MulVF src (LoadVector mem))); 9972 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 9973 ins_encode %{ 9974 int vector_len = 0; 9975 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9976 %} 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 9981 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9982 match(Set dst (MulVF src1 src2)); 9983 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 9984 ins_encode %{ 9985 int vector_len = 1; 9986 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9987 %} 9988 ins_pipe( pipe_slow ); 9989 %} 9990 9991 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 9992 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9993 match(Set dst (MulVF src (LoadVector mem))); 9994 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 9995 ins_encode %{ 9996 int vector_len = 1; 9997 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9998 %} 9999 ins_pipe( pipe_slow ); 10000 %} 10001 10002 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10003 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10004 match(Set dst (MulVF src1 src2)); 10005 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 10006 ins_encode %{ 10007 int vector_len = 2; 10008 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10009 %} 10010 ins_pipe( pipe_slow ); 10011 %} 10012 10013 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 10014 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10015 match(Set dst (MulVF src (LoadVector mem))); 10016 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 10017 ins_encode %{ 10018 int vector_len = 2; 10019 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10020 %} 10021 ins_pipe( pipe_slow ); 10022 %} 10023 10024 // Doubles vector mul 10025 instruct vmul2D(vecX dst, vecX src) %{ 10026 predicate(n->as_Vector()->length() == 2); 10027 match(Set dst (MulVD dst src)); 10028 format %{ "mulpd $dst,$src\t! mul packed2D" %} 10029 ins_encode %{ 10030 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 10031 %} 10032 ins_pipe( pipe_slow ); 10033 %} 10034 10035 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 10036 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10037 match(Set dst (MulVD src1 src2)); 10038 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 10039 ins_encode %{ 10040 int vector_len = 0; 10041 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10042 %} 10043 ins_pipe( pipe_slow ); 10044 %} 10045 10046 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 10047 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10048 match(Set dst (MulVD src (LoadVector mem))); 10049 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 10050 ins_encode %{ 10051 int vector_len = 0; 10052 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10053 %} 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 10058 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10059 match(Set dst (MulVD src1 src2)); 10060 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 10061 ins_encode %{ 10062 int vector_len = 1; 10063 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10064 %} 10065 ins_pipe( pipe_slow ); 10066 %} 10067 10068 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 10069 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10070 match(Set dst (MulVD src (LoadVector mem))); 10071 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 10072 ins_encode %{ 10073 int vector_len = 1; 10074 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10075 %} 10076 ins_pipe( pipe_slow ); 10077 %} 10078 10079 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10080 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10081 match(Set dst (MulVD src1 src2)); 10082 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 10083 ins_encode %{ 10084 int vector_len = 2; 10085 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10086 %} 10087 ins_pipe( pipe_slow ); 10088 %} 10089 10090 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 10091 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10092 match(Set dst (MulVD src (LoadVector mem))); 10093 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 10094 ins_encode %{ 10095 int vector_len = 2; 10096 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10097 %} 10098 ins_pipe( pipe_slow ); 10099 %} 10100 10101 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 10102 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 10103 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 10104 effect(TEMP dst, USE src1, USE src2); 10105 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 10106 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 10107 %} 10108 ins_encode %{ 10109 int vector_len = 1; 10110 int cond = (Assembler::Condition)($copnd$$cmpcode); 10111 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 10112 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 10118 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 10119 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 10120 effect(TEMP dst, USE src1, USE src2); 10121 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 10122 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 10123 %} 10124 ins_encode %{ 10125 int vector_len = 1; 10126 int cond = (Assembler::Condition)($copnd$$cmpcode); 10127 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 10128 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10129 %} 10130 ins_pipe( pipe_slow ); 10131 %} 10132 10133 // --------------------------------- DIV -------------------------------------- 10134 10135 // Floats vector div 10136 instruct vdiv2F(vecD dst, vecD src) %{ 10137 predicate(n->as_Vector()->length() == 2); 10138 match(Set dst (DivVF dst src)); 10139 format %{ "divps $dst,$src\t! div packed2F" %} 10140 ins_encode %{ 10141 __ divps($dst$$XMMRegister, $src$$XMMRegister); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 10147 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10148 match(Set dst (DivVF src1 src2)); 10149 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 10150 ins_encode %{ 10151 int vector_len = 0; 10152 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10153 %} 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 10158 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10159 match(Set dst (DivVF src (LoadVector mem))); 10160 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 10161 ins_encode %{ 10162 int vector_len = 0; 10163 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10164 %} 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 instruct vdiv4F(vecX dst, vecX src) %{ 10169 predicate(n->as_Vector()->length() == 4); 10170 match(Set dst (DivVF dst src)); 10171 format %{ "divps $dst,$src\t! div packed4F" %} 10172 ins_encode %{ 10173 __ divps($dst$$XMMRegister, $src$$XMMRegister); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 10179 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10180 match(Set dst (DivVF src1 src2)); 10181 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 10182 ins_encode %{ 10183 int vector_len = 0; 10184 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10185 %} 10186 ins_pipe( pipe_slow ); 10187 %} 10188 10189 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 10190 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10191 match(Set dst (DivVF src (LoadVector mem))); 10192 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 10193 ins_encode %{ 10194 int vector_len = 0; 10195 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 10201 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 10202 match(Set dst (DivVF src1 src2)); 10203 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 10204 ins_encode %{ 10205 int vector_len = 1; 10206 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10207 %} 10208 ins_pipe( pipe_slow ); 10209 %} 10210 10211 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 10212 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 10213 match(Set dst (DivVF src (LoadVector mem))); 10214 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 10215 ins_encode %{ 10216 int vector_len = 1; 10217 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10218 %} 10219 ins_pipe( pipe_slow ); 10220 %} 10221 10222 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10223 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 10224 match(Set dst (DivVF src1 src2)); 10225 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 10226 ins_encode %{ 10227 int vector_len = 2; 10228 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10229 %} 10230 ins_pipe( pipe_slow ); 10231 %} 10232 10233 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 10234 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 10235 match(Set dst (DivVF src (LoadVector mem))); 10236 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 10237 ins_encode %{ 10238 int vector_len = 2; 10239 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10240 %} 10241 ins_pipe( pipe_slow ); 10242 %} 10243 10244 // Doubles vector div 10245 instruct vdiv2D(vecX dst, vecX src) %{ 10246 predicate(n->as_Vector()->length() == 2); 10247 match(Set dst (DivVD dst src)); 10248 format %{ "divpd $dst,$src\t! div packed2D" %} 10249 ins_encode %{ 10250 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 10251 %} 10252 ins_pipe( pipe_slow ); 10253 %} 10254 10255 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 10256 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10257 match(Set dst (DivVD src1 src2)); 10258 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 10259 ins_encode %{ 10260 int vector_len = 0; 10261 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10262 %} 10263 ins_pipe( pipe_slow ); 10264 %} 10265 10266 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 10267 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10268 match(Set dst (DivVD src (LoadVector mem))); 10269 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 10270 ins_encode %{ 10271 int vector_len = 0; 10272 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10273 %} 10274 ins_pipe( pipe_slow ); 10275 %} 10276 10277 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 10278 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10279 match(Set dst (DivVD src1 src2)); 10280 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 10281 ins_encode %{ 10282 int vector_len = 1; 10283 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10284 %} 10285 ins_pipe( pipe_slow ); 10286 %} 10287 10288 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 10289 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10290 match(Set dst (DivVD src (LoadVector mem))); 10291 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 10292 ins_encode %{ 10293 int vector_len = 1; 10294 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10295 %} 10296 ins_pipe( pipe_slow ); 10297 %} 10298 10299 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10300 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10301 match(Set dst (DivVD src1 src2)); 10302 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 10303 ins_encode %{ 10304 int vector_len = 2; 10305 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10306 %} 10307 ins_pipe( pipe_slow ); 10308 %} 10309 10310 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 10311 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10312 match(Set dst (DivVD src (LoadVector mem))); 10313 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 10314 ins_encode %{ 10315 int vector_len = 2; 10316 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10317 %} 10318 ins_pipe( pipe_slow ); 10319 %} 10320 10321 // ------------------------------ Min --------------------------------------- 10322 // Byte vector Min 10323 instruct min8B_reg(vecD dst, vecD src1, vecD src2) %{ 10324 predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10325 match(Set dst (MinV src1 src2)); 10326 effect(TEMP dst); 10327 format %{ "movsd $dst,$src1\n\t" 10328 "pminsb $dst,$src2\t! " %} 10329 ins_encode %{ 10330 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10331 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 10332 %} 10333 ins_pipe( pipe_slow ); 10334 %} 10335 10336 instruct min16B_reg(vecX dst, vecX src1, vecX src2) %{ 10337 predicate(UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10338 match(Set dst (MinV src1 src2)); 10339 effect(TEMP dst); 10340 format %{ "movdqu $dst,$src1\n\t" 10341 "pminsb $dst,$src2\t! " %} 10342 ins_encode %{ 10343 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10344 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 10345 %} 10346 ins_pipe( pipe_slow ); 10347 %} 10348 10349 instruct min16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10350 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10351 match(Set dst (MinV src1 src2)); 10352 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10353 ins_encode %{ 10354 int vector_len = 0; 10355 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10356 %} 10357 ins_pipe( pipe_slow ); 10358 %} 10359 10360 instruct min16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10361 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10362 match(Set dst (MinV src1 src2)); 10363 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10364 ins_encode %{ 10365 int vector_len = 0; 10366 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10367 %} 10368 ins_pipe( pipe_slow ); 10369 %} 10370 10371 instruct min32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10372 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10373 match(Set dst (MinV src1 src2)); 10374 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10375 ins_encode %{ 10376 int vector_len = 1; 10377 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10378 %} 10379 ins_pipe( pipe_slow ); 10380 %} 10381 10382 instruct min32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10383 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10384 match(Set dst (MinV src1 src2)); 10385 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10386 ins_encode %{ 10387 int vector_len = 1; 10388 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10389 %} 10390 ins_pipe( pipe_slow ); 10391 %} 10392 10393 instruct min64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10394 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10395 match(Set dst (MinV src1 src2)); 10396 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10397 ins_encode %{ 10398 int vector_len = 2; 10399 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10400 %} 10401 ins_pipe( pipe_slow ); 10402 %} 10403 10404 //Short vector Min 10405 instruct min4S_reg(vecD dst, vecD src1, vecD src2) %{ 10406 predicate(UseSSE > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10407 match(Set dst (MinV src1 src2)); 10408 effect(TEMP dst); 10409 format %{ "movsd $dst,$src1\n\t" 10410 "pminsw $dst,$src2\t! " %} 10411 ins_encode %{ 10412 __ movsd($src1$$XMMRegister, $src2$$XMMRegister); 10413 __ pminsw($dst$$XMMRegister, $src1$$XMMRegister); 10414 %} 10415 ins_pipe( pipe_slow ); 10416 %} 10417 10418 instruct min8S_reg(vecX dst, vecX src1, vecX src2) %{ 10419 predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10420 match(Set dst (MinV src1 src2)); 10421 effect(TEMP dst); 10422 format %{ "movsd $dst,$src1\n\t" 10423 "pminsw $dst,$src2\t! " %} 10424 ins_encode %{ 10425 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10426 __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); 10427 %} 10428 ins_pipe( pipe_slow ); 10429 %} 10430 10431 instruct min8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10432 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10433 match(Set dst (MinV src1 src2)); 10434 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10435 ins_encode %{ 10436 int vector_len = 0; 10437 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10438 %} 10439 ins_pipe( pipe_slow ); 10440 %} 10441 10442 instruct min8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10443 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10444 match(Set dst (MinV src1 src2)); 10445 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10446 ins_encode %{ 10447 int vector_len = 0; 10448 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10449 %} 10450 ins_pipe( pipe_slow ); 10451 %} 10452 10453 instruct min16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10454 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10455 match(Set dst (MinV src1 src2)); 10456 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10457 ins_encode %{ 10458 int vector_len = 1; 10459 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10460 %} 10461 ins_pipe( pipe_slow ); 10462 %} 10463 10464 instruct min16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10465 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10466 match(Set dst (MinV src1 src2)); 10467 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10468 ins_encode %{ 10469 int vector_len = 1; 10470 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10471 %} 10472 ins_pipe( pipe_slow ); 10473 %} 10474 10475 instruct min32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10476 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10477 match(Set dst (MinV src1 src2)); 10478 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10479 ins_encode %{ 10480 int vector_len = 2; 10481 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10482 %} 10483 ins_pipe( pipe_slow ); 10484 %} 10485 10486 // Int vector Min 10487 instruct min2I_reg(vecD dst, vecD src1, vecD src2) %{ 10488 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10489 match(Set dst (MinV src1 src2)); 10490 effect(TEMP dst); 10491 format %{ "movsd $dst,$src1\n\t" 10492 "pminsd $dst,$src2\t! " %} 10493 ins_encode %{ 10494 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10495 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 10496 %} 10497 ins_pipe( pipe_slow ); 10498 %} 10499 10500 instruct min4I_reg(vecX dst, vecX src1, vecX src2) %{ 10501 predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10502 match(Set dst (MinV src1 src2)); 10503 effect(TEMP dst); 10504 format %{ "movdqu $dst,$src1\n\t" 10505 "pminsd $dst,$src2\t! " %} 10506 ins_encode %{ 10507 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10508 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 10509 %} 10510 ins_pipe( pipe_slow ); 10511 %} 10512 10513 instruct min4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10514 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10515 match(Set dst (MinV src1 src2)); 10516 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10517 ins_encode %{ 10518 int vector_len = 0; 10519 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10520 %} 10521 ins_pipe( pipe_slow ); 10522 %} 10523 10524 instruct min4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10525 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10526 match(Set dst (MinV src1 src2)); 10527 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10528 ins_encode %{ 10529 int vector_len = 0; 10530 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10531 %} 10532 ins_pipe( pipe_slow ); 10533 %} 10534 10535 instruct min8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10536 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10537 match(Set dst (MinV src1 src2)); 10538 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10539 ins_encode %{ 10540 int vector_len = 1; 10541 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10542 %} 10543 ins_pipe( pipe_slow ); 10544 %} 10545 10546 instruct min8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10547 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10548 match(Set dst (MinV src1 src2)); 10549 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10550 ins_encode %{ 10551 int vector_len = 1; 10552 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10553 %} 10554 ins_pipe( pipe_slow ); 10555 %} 10556 10557 instruct min16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10558 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10559 match(Set dst (MinV src1 src2)); 10560 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10561 ins_encode %{ 10562 int vector_len = 2; 10563 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10564 %} 10565 ins_pipe( pipe_slow ); 10566 %} 10567 10568 // Long vector Min 10569 instruct minL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 10570 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10571 match(Set dst (MinV src1 src2)); 10572 effect(TEMP dst, TEMP tmp); 10573 format %{ "movsd $tmp,$src1\n\t" 10574 "movsd $dst,$src1\n\t" 10575 "pcmpgtq $tmp,$src2\n\t" 10576 "blendvpd $dst,$src2\t! " %} 10577 ins_encode %{ 10578 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 10579 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10580 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 10581 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 10582 %} 10583 ins_pipe( pipe_slow ); 10584 %} 10585 10586 instruct min2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 10587 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10588 match(Set dst (MinV src1 src2)); 10589 effect(TEMP dst, TEMP tmp); 10590 format %{ "movdqu $tmp,$src1\n\t" 10591 "movdqu $dst,$src1\n\t" 10592 "pcmpgtq $tmp,$src2\n\t" 10593 "blendvpd $dst,$src2\t! " %} 10594 ins_encode %{ 10595 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 10596 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10597 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 10598 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 10599 %} 10600 ins_pipe( pipe_slow ); 10601 %} 10602 10603 instruct min2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10604 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10605 match(Set dst (MinV src1 src2)); 10606 effect(TEMP dst); 10607 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 10608 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 10609 ins_encode %{ 10610 int vector_len = 0; 10611 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10612 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10613 %} 10614 ins_pipe( pipe_slow ); 10615 %} 10616 10617 instruct min4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10618 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10619 match(Set dst (MinV src1 src2)); 10620 effect(TEMP dst); 10621 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 10622 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 10623 ins_encode %{ 10624 int vector_len = 1; 10625 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10626 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10627 %} 10628 ins_pipe( pipe_slow ); 10629 %} 10630 10631 instruct min2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10632 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10633 match(Set dst (MinV src1 src2)); 10634 format %{ "vpminsq $dst,$src1,src2\t! " %} 10635 ins_encode %{ 10636 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 10637 %} 10638 ins_pipe( pipe_slow ); 10639 %} 10640 10641 instruct min4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10642 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10643 match(Set dst (MinV src1 src2)); 10644 format %{ "vpminsq $dst,$src1,src2\t! " %} 10645 ins_encode %{ 10646 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 10647 %} 10648 ins_pipe( pipe_slow ); 10649 %} 10650 10651 instruct min8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10652 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10653 match(Set dst (MinV src1 src2)); 10654 format %{ "vpminsq $dst,$src1,src2\t! " %} 10655 ins_encode %{ 10656 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 10657 %} 10658 ins_pipe( pipe_slow ); 10659 %} 10660 10661 // Float vector Min 10662 instruct min2F_reg(vecD dst, vecD src1, vecD src2) %{ 10663 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10664 match(Set dst (MinV src1 src2)); 10665 effect(TEMP dst); 10666 format %{ "movsd $dst,$src1\n\t" 10667 "minps $dst,$src2\t! " %} 10668 ins_encode %{ 10669 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10670 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 10671 %} 10672 ins_pipe( pipe_slow ); 10673 %} 10674 10675 instruct min4F_reg(vecX dst, vecX src1, vecX src2) %{ 10676 predicate(UseSSE > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10677 match(Set dst (MinV src1 src2)); 10678 effect(TEMP dst); 10679 format %{ "movdqu $dst,$src1\n\t" 10680 "minps $dst,$src2\t! " %} 10681 ins_encode %{ 10682 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10683 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 10684 %} 10685 ins_pipe( pipe_slow ); 10686 %} 10687 10688 instruct min4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10689 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10690 match(Set dst (MinV src1 src2)); 10691 format %{ "vminps $dst,$src1,$src2\t! " %} 10692 ins_encode %{ 10693 int vector_len = 0; 10694 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10695 %} 10696 ins_pipe( pipe_slow ); 10697 %} 10698 10699 instruct min4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10700 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10701 match(Set dst (MinV src1 src2)); 10702 format %{ "vminps $dst,$src1,$src2\t! " %} 10703 ins_encode %{ 10704 int vector_len = 0; 10705 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10706 %} 10707 ins_pipe( pipe_slow ); 10708 %} 10709 10710 instruct min8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10711 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10712 match(Set dst (MinV src1 src2)); 10713 format %{ "vminps $dst,$src1,$src2\t! " %} 10714 ins_encode %{ 10715 int vector_len = 1; 10716 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10717 %} 10718 ins_pipe( pipe_slow ); 10719 %} 10720 10721 instruct min8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10722 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10723 match(Set dst (MinV src1 src2)); 10724 format %{ "vminps $dst,$src1,$src2\t! " %} 10725 ins_encode %{ 10726 int vector_len = 1; 10727 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10728 %} 10729 ins_pipe( pipe_slow ); 10730 %} 10731 10732 instruct min16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10733 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10734 match(Set dst (MinV src1 src2)); 10735 format %{ "vminps $dst,$src1,$src2\t! " %} 10736 ins_encode %{ 10737 int vector_len = 2; 10738 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10739 %} 10740 ins_pipe( pipe_slow ); 10741 %} 10742 10743 // Double vector Min 10744 instruct minD_reg(vecD dst, vecD src1, vecD src2) %{ 10745 predicate(UseSSE > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10746 match(Set dst (MinV src1 src2)); 10747 effect(TEMP dst); 10748 format %{ "movsd $dst,$src1\n\t" 10749 "minpd $dst,$src2\t! " %} 10750 ins_encode %{ 10751 int vector_len = 0; 10752 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10753 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 10754 %} 10755 ins_pipe( pipe_slow ); 10756 %} 10757 10758 instruct min2D_reg(vecX dst, vecX src1, vecX src2) %{ 10759 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10760 match(Set dst (MinV src1 src2)); 10761 effect(TEMP dst); 10762 format %{ "movdqu $dst,$src1\n\t" 10763 "minpd $dst,$src2\t! " %} 10764 ins_encode %{ 10765 int vector_len = 0; 10766 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10767 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 10768 %} 10769 ins_pipe( pipe_slow ); 10770 %} 10771 10772 instruct min2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10773 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10774 match(Set dst (MinV src1 src2)); 10775 format %{ "vminpd $dst,$src1,$src2\t! " %} 10776 ins_encode %{ 10777 int vector_len = 0; 10778 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10779 %} 10780 ins_pipe( pipe_slow ); 10781 %} 10782 10783 instruct min2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10784 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10785 match(Set dst (MinV src1 src2)); 10786 format %{ "vminpd $dst,$src1,$src2\t! " %} 10787 ins_encode %{ 10788 int vector_len = 0; 10789 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10790 %} 10791 ins_pipe( pipe_slow ); 10792 %} 10793 10794 instruct min4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10795 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10796 match(Set dst (MinV src1 src2)); 10797 format %{ "vminpd $dst,$src1,$src2\t! " %} 10798 ins_encode %{ 10799 int vector_len = 1; 10800 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10801 %} 10802 ins_pipe( pipe_slow ); 10803 %} 10804 10805 instruct min4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10806 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10807 match(Set dst (MinV src1 src2)); 10808 format %{ "vminpd $dst,$src1,$src2\t! " %} 10809 ins_encode %{ 10810 int vector_len = 1; 10811 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10812 %} 10813 ins_pipe( pipe_slow ); 10814 %} 10815 10816 instruct min8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10817 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10818 match(Set dst (MinV src1 src2)); 10819 format %{ "vminpd $dst,$src1,$src2\t! " %} 10820 ins_encode %{ 10821 int vector_len = 2; 10822 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10823 %} 10824 ins_pipe( pipe_slow ); 10825 %} 10826 10827 // ------------------------------ Max --------------------------------------- 10828 // Byte vector Max 10829 instruct max8B_reg(vecD dst, vecD src1, vecD src2) %{ 10830 predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10831 match(Set dst (MaxV src1 src2)); 10832 effect(TEMP dst); 10833 format %{ "movsd $dst,$src1\n\t" 10834 "pmaxsb $dst,$src2\t! " %} 10835 ins_encode %{ 10836 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10837 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 10838 %} 10839 ins_pipe( pipe_slow ); 10840 %} 10841 10842 instruct max16B_reg(vecX dst, vecX src1, vecX src2) %{ 10843 predicate(UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10844 match(Set dst (MaxV src1 src2)); 10845 effect(TEMP dst); 10846 format %{ "movdqu $dst,$src1\n\t" 10847 "pmaxsb $dst,$src2\t! " %} 10848 ins_encode %{ 10849 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10850 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 10851 %} 10852 ins_pipe( pipe_slow ); 10853 %} 10854 10855 instruct max16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10856 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10857 match(Set dst (MaxV src1 src2)); 10858 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10859 ins_encode %{ 10860 int vector_len = 0; 10861 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10862 %} 10863 ins_pipe( pipe_slow ); 10864 %} 10865 10866 instruct max16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10867 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10868 match(Set dst (MaxV src1 src2)); 10869 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10870 ins_encode %{ 10871 int vector_len = 0; 10872 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10873 %} 10874 ins_pipe( pipe_slow ); 10875 %} 10876 10877 instruct max32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10878 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10879 match(Set dst (MaxV src1 src2)); 10880 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10881 ins_encode %{ 10882 int vector_len = 1; 10883 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10884 %} 10885 ins_pipe( pipe_slow ); 10886 %} 10887 10888 instruct max32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10889 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10890 match(Set dst (MaxV src1 src2)); 10891 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10892 ins_encode %{ 10893 int vector_len = 1; 10894 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10895 %} 10896 ins_pipe( pipe_slow ); 10897 %} 10898 10899 instruct max64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10900 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10901 match(Set dst (MaxV src1 src2)); 10902 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10903 ins_encode %{ 10904 int vector_len = 2; 10905 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10906 %} 10907 ins_pipe( pipe_slow ); 10908 %} 10909 10910 //Short vector Max 10911 instruct max4S_reg(vecD dst, vecD src1, vecD src2) %{ 10912 predicate(UseSSE > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10913 match(Set dst (MaxV src1 src2)); 10914 effect(TEMP dst); 10915 format %{ "movsd $dst,$src1\n\t" 10916 "pmaxsw $dst,$src2\t! " %} 10917 ins_encode %{ 10918 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10919 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 10920 %} 10921 ins_pipe( pipe_slow ); 10922 %} 10923 10924 instruct max8S_reg(vecX dst, vecX src1, vecX src2) %{ 10925 predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10926 match(Set dst (MaxV src1 src2)); 10927 effect(TEMP dst); 10928 format %{ "movdqu $dst,$src1\n\t" 10929 "pmaxsw $dst,$src2\t! " %} 10930 ins_encode %{ 10931 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10932 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 10933 %} 10934 ins_pipe( pipe_slow ); 10935 %} 10936 10937 instruct max8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10938 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10939 match(Set dst (MaxV src1 src2)); 10940 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10941 ins_encode %{ 10942 int vector_len = 0; 10943 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10944 %} 10945 ins_pipe( pipe_slow ); 10946 %} 10947 10948 instruct max8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10949 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10950 match(Set dst (MaxV src1 src2)); 10951 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10952 ins_encode %{ 10953 int vector_len = 0; 10954 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10955 %} 10956 ins_pipe( pipe_slow ); 10957 %} 10958 10959 instruct max16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10960 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10961 match(Set dst (MaxV src1 src2)); 10962 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10963 ins_encode %{ 10964 int vector_len = 1; 10965 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10966 %} 10967 ins_pipe( pipe_slow ); 10968 %} 10969 10970 instruct max16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10971 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10972 match(Set dst (MaxV src1 src2)); 10973 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10974 ins_encode %{ 10975 int vector_len = 1; 10976 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10977 %} 10978 ins_pipe( pipe_slow ); 10979 %} 10980 10981 instruct max32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10982 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10983 match(Set dst (MaxV src1 src2)); 10984 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10985 ins_encode %{ 10986 int vector_len = 2; 10987 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10988 %} 10989 ins_pipe( pipe_slow ); 10990 %} 10991 10992 // Int vector Max 10993 instruct max2I_reg(vecD dst, vecD src1, vecD src2) %{ 10994 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10995 match(Set dst (MaxV src1 src2)); 10996 effect(TEMP dst); 10997 format %{ "movdqu $dst,$src1\n\t" 10998 "pmaxsd $dst,$src2\t! " %} 10999 ins_encode %{ 11000 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11001 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 11002 %} 11003 ins_pipe( pipe_slow ); 11004 %} 11005 11006 instruct max4I_reg(vecX dst, vecX src1, vecX src2) %{ 11007 predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 11008 match(Set dst (MaxV src1 src2)); 11009 effect(TEMP dst); 11010 format %{ "movdqu $dst,$src1\n\t" 11011 "pmaxsd $dst,$src2\t! " %} 11012 ins_encode %{ 11013 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11014 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 11015 %} 11016 ins_pipe( pipe_slow ); 11017 %} 11018 11019 instruct max4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11020 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 11021 match(Set dst (MaxV src1 src2)); 11022 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 11023 ins_encode %{ 11024 int vector_len = 0; 11025 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11026 %} 11027 ins_pipe( pipe_slow ); 11028 %} 11029 11030 instruct max4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11031 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 11032 match(Set dst (MaxV src1 src2)); 11033 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 11034 ins_encode %{ 11035 int vector_len = 0; 11036 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11037 %} 11038 ins_pipe( pipe_slow ); 11039 %} 11040 11041 instruct max8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11042 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 11043 match(Set dst (MaxV src1 src2)); 11044 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 11045 ins_encode %{ 11046 int vector_len = 1; 11047 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11048 %} 11049 ins_pipe( pipe_slow ); 11050 %} 11051 11052 instruct max8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11053 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 11054 match(Set dst (MaxV src1 src2)); 11055 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 11056 ins_encode %{ 11057 int vector_len = 1; 11058 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11059 %} 11060 ins_pipe( pipe_slow ); 11061 %} 11062 11063 instruct max16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11064 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 11065 match(Set dst (MaxV src1 src2)); 11066 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 11067 ins_encode %{ 11068 int vector_len = 2; 11069 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11070 %} 11071 ins_pipe( pipe_slow ); 11072 %} 11073 11074 // Long Vector Max 11075 instruct maxL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 11076 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11077 match(Set dst (MaxV src1 src2)); 11078 effect(TEMP dst, TEMP tmp); 11079 format %{ "movsd $tmp,$src1\n\t" 11080 "movsd $dst,$src1\n\t" 11081 "pcmpgtq $tmp,$src2\n\t" 11082 "blendvpd $dst,$src2\t! " %} 11083 ins_encode %{ 11084 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 11085 __ movsd($dst$$XMMRegister, $src2$$XMMRegister); 11086 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 11087 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister); 11088 %} 11089 ins_pipe( pipe_slow ); 11090 %} 11091 11092 instruct max2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 11093 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11094 match(Set dst (MaxV src1 src2)); 11095 effect(TEMP dst, TEMP tmp); 11096 format %{ "movdqu $tmp,$src1\n\t" 11097 "movdqu $dst,$src2\n\t" 11098 "pcmpgtq $tmp,$src2\n\t" 11099 "blendvpd $dst,$src1\t! " %} 11100 ins_encode %{ 11101 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 11102 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11103 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 11104 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 11105 %} 11106 ins_pipe( pipe_slow ); 11107 %} 11108 11109 instruct max2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11110 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11111 match(Set dst (MaxV src1 src2)); 11112 effect(TEMP dst); 11113 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 11114 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 11115 ins_encode %{ 11116 int vector_len = 0; 11117 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11118 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 11119 %} 11120 ins_pipe( pipe_slow ); 11121 %} 11122 11123 instruct max2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11124 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11125 match(Set dst (MaxV src1 src2)); 11126 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 11127 ins_encode %{ 11128 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 11129 %} 11130 ins_pipe( pipe_slow ); 11131 %} 11132 11133 instruct max4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11134 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11135 match(Set dst (MaxV src1 src2)); 11136 effect(TEMP dst); 11137 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 11138 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 11139 ins_encode %{ 11140 int vector_len = 1; 11141 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11142 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 11143 %} 11144 ins_pipe( pipe_slow ); 11145 %} 11146 11147 instruct max4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11148 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11149 match(Set dst (MaxV src1 src2)); 11150 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 11151 ins_encode %{ 11152 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 11153 %} 11154 ins_pipe( pipe_slow ); 11155 %} 11156 11157 instruct max8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11158 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11159 match(Set dst (MaxV src1 src2)); 11160 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 11161 ins_encode %{ 11162 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); 11163 %} 11164 ins_pipe( pipe_slow ); 11165 %} 11166 11167 // Float Vector Max 11168 instruct max2F_reg(vecD dst, vecD src1, vecD src2) %{ 11169 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11170 match(Set dst (MaxV src1 src2)); 11171 effect(TEMP dst); 11172 format %{ "movsd $dst,$src1\n\t" 11173 "maxps $dst,$src2\t! " %} 11174 ins_encode %{ 11175 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 11176 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 11177 %} 11178 ins_pipe( pipe_slow ); 11179 %} 11180 11181 instruct max4F_reg(vecX dst, vecX src1, vecX src2) %{ 11182 predicate(UseSSE > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11183 match(Set dst (MaxV src1 src2)); 11184 effect(TEMP dst); 11185 format %{ "movdqu $dst,$src1\n\t" 11186 "maxps $dst,$src2\t! " %} 11187 ins_encode %{ 11188 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11189 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 11190 %} 11191 ins_pipe( pipe_slow ); 11192 %} 11193 11194 instruct max4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11195 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11196 match(Set dst (MaxV src1 src2)); 11197 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11198 ins_encode %{ 11199 int vector_len = 0; 11200 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11201 %} 11202 ins_pipe( pipe_slow ); 11203 %} 11204 11205 instruct max4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11206 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11207 match(Set dst (MaxV src1 src2)); 11208 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11209 ins_encode %{ 11210 int vector_len = 0; 11211 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11212 %} 11213 ins_pipe( pipe_slow ); 11214 %} 11215 11216 instruct max8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11217 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11218 match(Set dst (MaxV src1 src2)); 11219 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11220 ins_encode %{ 11221 int vector_len = 1; 11222 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11223 %} 11224 ins_pipe( pipe_slow ); 11225 %} 11226 11227 instruct max8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11228 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11229 match(Set dst (MaxV src1 src2)); 11230 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11231 ins_encode %{ 11232 int vector_len = 1; 11233 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11234 %} 11235 ins_pipe( pipe_slow ); 11236 %} 11237 11238 instruct max16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11239 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11240 match(Set dst (MaxV src1 src2)); 11241 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11242 ins_encode %{ 11243 int vector_len = 2; 11244 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11245 %} 11246 ins_pipe( pipe_slow ); 11247 %} 11248 11249 // Double Vector Max 11250 instruct maxD_reg(vecD dst, vecD src1, vecD src2) %{ 11251 predicate(UseSSE > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11252 match(Set dst (MaxV src1 src2)); 11253 effect(TEMP dst); 11254 format %{ "movsd $dst,$src1\n\t" 11255 "maxpd $dst,$src2\t! " %} 11256 ins_encode %{ 11257 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 11258 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 11259 %} 11260 ins_pipe( pipe_slow ); 11261 %} 11262 11263 instruct max2D_reg(vecX dst, vecX src1, vecX src2) %{ 11264 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11265 match(Set dst (MaxV src1 src2)); 11266 effect(TEMP dst); 11267 format %{ "movdqu $dst,$src1\n\t" 11268 "maxpd $dst,$src2\t! " %} 11269 ins_encode %{ 11270 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11271 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 11272 %} 11273 ins_pipe( pipe_slow ); 11274 %} 11275 11276 instruct max2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11277 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11278 match(Set dst (MaxV src1 src2)); 11279 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11280 ins_encode %{ 11281 int vector_len = 0; 11282 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11283 %} 11284 ins_pipe( pipe_slow ); 11285 %} 11286 11287 instruct max2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11288 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11289 match(Set dst (MaxV src1 src2)); 11290 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11291 ins_encode %{ 11292 int vector_len = 0; 11293 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11294 %} 11295 ins_pipe( pipe_slow ); 11296 %} 11297 11298 instruct max4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11299 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11300 match(Set dst (MaxV src1 src2)); 11301 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11302 ins_encode %{ 11303 int vector_len = 1; 11304 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11305 %} 11306 ins_pipe( pipe_slow ); 11307 %} 11308 11309 instruct max4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11310 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11311 match(Set dst (MaxV src1 src2)); 11312 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11313 ins_encode %{ 11314 int vector_len = 1; 11315 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11316 %} 11317 ins_pipe( pipe_slow ); 11318 %} 11319 11320 instruct max8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11321 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11322 match(Set dst (MaxV src1 src2)); 11323 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11324 ins_encode %{ 11325 int vector_len = 2; 11326 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11327 %} 11328 ins_pipe( pipe_slow ); 11329 %} 11330 11331 // ------------------------------ Shift --------------------------------------- 11332 11333 // Left and right shift count vectors are the same on x86 11334 // (only lowest bits of xmm reg are used for count). 11335 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 11336 match(Set dst (LShiftCntV cnt)); 11337 match(Set dst (RShiftCntV cnt)); 11338 format %{ "movd $dst,$cnt\t! load shift count" %} 11339 ins_encode %{ 11340 __ movdl($dst$$XMMRegister, $cnt$$Register); 11341 %} 11342 ins_pipe( pipe_slow ); 11343 %} 11344 11345 // --------------------------------- Sqrt -------------------------------------- 11346 11347 // Floating point vector sqrt 11348 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 11349 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11350 match(Set dst (SqrtVD src)); 11351 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 11352 ins_encode %{ 11353 int vector_len = 0; 11354 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11355 %} 11356 ins_pipe( pipe_slow ); 11357 %} 11358 11359 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 11360 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11361 match(Set dst (SqrtVD (LoadVector mem))); 11362 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 11363 ins_encode %{ 11364 int vector_len = 0; 11365 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 11366 %} 11367 ins_pipe( pipe_slow ); 11368 %} 11369 11370 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 11371 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11372 match(Set dst (SqrtVD src)); 11373 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 11374 ins_encode %{ 11375 int vector_len = 1; 11376 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11377 %} 11378 ins_pipe( pipe_slow ); 11379 %} 11380 11381 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 11382 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11383 match(Set dst (SqrtVD (LoadVector mem))); 11384 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 11385 ins_encode %{ 11386 int vector_len = 1; 11387 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 11388 %} 11389 ins_pipe( pipe_slow ); 11390 %} 11391 11392 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 11393 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 11394 match(Set dst (SqrtVD src)); 11395 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 11396 ins_encode %{ 11397 int vector_len = 2; 11398 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11399 %} 11400 ins_pipe( pipe_slow ); 11401 %} 11402 11403 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 11404 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 11405 match(Set dst (SqrtVD (LoadVector mem))); 11406 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 11407 ins_encode %{ 11408 int vector_len = 2; 11409 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 11410 %} 11411 ins_pipe( pipe_slow ); 11412 %} 11413 11414 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 11415 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11416 match(Set dst (SqrtVF src)); 11417 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 11418 ins_encode %{ 11419 int vector_len = 0; 11420 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11421 %} 11422 ins_pipe( pipe_slow ); 11423 %} 11424 11425 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 11426 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11427 match(Set dst (SqrtVF (LoadVector mem))); 11428 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 11429 ins_encode %{ 11430 int vector_len = 0; 11431 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11432 %} 11433 ins_pipe( pipe_slow ); 11434 %} 11435 11436 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 11437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11438 match(Set dst (SqrtVF src)); 11439 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 11440 ins_encode %{ 11441 int vector_len = 0; 11442 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11443 %} 11444 ins_pipe( pipe_slow ); 11445 %} 11446 11447 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 11448 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11449 match(Set dst (SqrtVF (LoadVector mem))); 11450 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 11451 ins_encode %{ 11452 int vector_len = 0; 11453 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11454 %} 11455 ins_pipe( pipe_slow ); 11456 %} 11457 11458 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 11459 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 11460 match(Set dst (SqrtVF src)); 11461 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 11462 ins_encode %{ 11463 int vector_len = 1; 11464 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11465 %} 11466 ins_pipe( pipe_slow ); 11467 %} 11468 11469 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 11470 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 11471 match(Set dst (SqrtVF (LoadVector mem))); 11472 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 11473 ins_encode %{ 11474 int vector_len = 1; 11475 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11476 %} 11477 ins_pipe( pipe_slow ); 11478 %} 11479 11480 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 11481 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11482 match(Set dst (SqrtVF src)); 11483 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 11484 ins_encode %{ 11485 int vector_len = 2; 11486 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11487 %} 11488 ins_pipe( pipe_slow ); 11489 %} 11490 11491 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 11492 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11493 match(Set dst (SqrtVF (LoadVector mem))); 11494 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 11495 ins_encode %{ 11496 int vector_len = 2; 11497 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11498 %} 11499 ins_pipe( pipe_slow ); 11500 %} 11501 11502 // ------------------------------ LeftShift ----------------------------------- 11503 11504 // Shorts/Chars vector left shift 11505 instruct vsll2S(vecS dst, vecS shift) %{ 11506 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 11507 match(Set dst (LShiftVS dst shift)); 11508 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 11509 ins_encode %{ 11510 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 11511 %} 11512 ins_pipe( pipe_slow ); 11513 %} 11514 11515 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 11516 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 11517 match(Set dst (LShiftVS dst shift)); 11518 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 11519 ins_encode %{ 11520 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 11521 %} 11522 ins_pipe( pipe_slow ); 11523 %} 11524 11525 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 11526 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11527 match(Set dst (LShiftVS src shift)); 11528 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11529 ins_encode %{ 11530 int vector_len = 0; 11531 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11532 %} 11533 ins_pipe( pipe_slow ); 11534 %} 11535 11536 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 11537 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11538 match(Set dst (LShiftVS src shift)); 11539 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11540 ins_encode %{ 11541 int vector_len = 0; 11542 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11543 %} 11544 ins_pipe( pipe_slow ); 11545 %} 11546 11547 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 11548 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11549 match(Set dst (LShiftVS dst shift)); 11550 effect(TEMP src); 11551 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11552 ins_encode %{ 11553 int vector_len = 0; 11554 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11555 %} 11556 ins_pipe( pipe_slow ); 11557 %} 11558 11559 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 11560 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11561 match(Set dst (LShiftVS src shift)); 11562 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11563 ins_encode %{ 11564 int vector_len = 0; 11565 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11566 %} 11567 ins_pipe( pipe_slow ); 11568 %} 11569 11570 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 11571 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11572 match(Set dst (LShiftVS src shift)); 11573 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11574 ins_encode %{ 11575 int vector_len = 0; 11576 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11577 %} 11578 ins_pipe( pipe_slow ); 11579 %} 11580 11581 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 11582 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11583 match(Set dst (LShiftVS dst shift)); 11584 effect(TEMP src); 11585 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11586 ins_encode %{ 11587 int vector_len = 0; 11588 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11589 %} 11590 ins_pipe( pipe_slow ); 11591 %} 11592 11593 instruct vsll4S(vecD dst, vecS shift) %{ 11594 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11595 match(Set dst (LShiftVS dst shift)); 11596 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 11597 ins_encode %{ 11598 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 11599 %} 11600 ins_pipe( pipe_slow ); 11601 %} 11602 11603 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 11604 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11605 match(Set dst (LShiftVS dst shift)); 11606 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 11607 ins_encode %{ 11608 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 11609 %} 11610 ins_pipe( pipe_slow ); 11611 %} 11612 11613 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 11614 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11615 match(Set dst (LShiftVS src shift)); 11616 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11617 ins_encode %{ 11618 int vector_len = 0; 11619 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11620 %} 11621 ins_pipe( pipe_slow ); 11622 %} 11623 11624 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 11625 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11626 match(Set dst (LShiftVS src shift)); 11627 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11628 ins_encode %{ 11629 int vector_len = 0; 11630 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11631 %} 11632 ins_pipe( pipe_slow ); 11633 %} 11634 11635 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 11636 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11637 match(Set dst (LShiftVS dst shift)); 11638 effect(TEMP src); 11639 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11640 ins_encode %{ 11641 int vector_len = 0; 11642 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11643 %} 11644 ins_pipe( pipe_slow ); 11645 %} 11646 11647 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 11648 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11649 match(Set dst (LShiftVS src shift)); 11650 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11651 ins_encode %{ 11652 int vector_len = 0; 11653 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11654 %} 11655 ins_pipe( pipe_slow ); 11656 %} 11657 11658 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 11659 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11660 match(Set dst (LShiftVS src shift)); 11661 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11662 ins_encode %{ 11663 int vector_len = 0; 11664 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11665 %} 11666 ins_pipe( pipe_slow ); 11667 %} 11668 11669 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 11670 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11671 match(Set dst (LShiftVS dst shift)); 11672 effect(TEMP src); 11673 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11674 ins_encode %{ 11675 int vector_len = 0; 11676 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11677 %} 11678 ins_pipe( pipe_slow ); 11679 %} 11680 11681 instruct vsll8S(vecX dst, vecS shift) %{ 11682 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11683 match(Set dst (LShiftVS dst shift)); 11684 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 11685 ins_encode %{ 11686 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 11687 %} 11688 ins_pipe( pipe_slow ); 11689 %} 11690 11691 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 11692 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11693 match(Set dst (LShiftVS dst shift)); 11694 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 11695 ins_encode %{ 11696 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 11697 %} 11698 ins_pipe( pipe_slow ); 11699 %} 11700 11701 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 11702 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11703 match(Set dst (LShiftVS src shift)); 11704 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11705 ins_encode %{ 11706 int vector_len = 0; 11707 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11708 %} 11709 ins_pipe( pipe_slow ); 11710 %} 11711 11712 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 11713 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11714 match(Set dst (LShiftVS src shift)); 11715 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11716 ins_encode %{ 11717 int vector_len = 0; 11718 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11719 %} 11720 ins_pipe( pipe_slow ); 11721 %} 11722 11723 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 11724 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11725 match(Set dst (LShiftVS dst shift)); 11726 effect(TEMP src); 11727 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11728 ins_encode %{ 11729 int vector_len = 0; 11730 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11731 %} 11732 ins_pipe( pipe_slow ); 11733 %} 11734 11735 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 11736 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11737 match(Set dst (LShiftVS src shift)); 11738 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11739 ins_encode %{ 11740 int vector_len = 0; 11741 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11742 %} 11743 ins_pipe( pipe_slow ); 11744 %} 11745 11746 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 11747 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11748 match(Set dst (LShiftVS src shift)); 11749 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11750 ins_encode %{ 11751 int vector_len = 0; 11752 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11753 %} 11754 ins_pipe( pipe_slow ); 11755 %} 11756 11757 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 11758 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11759 match(Set dst (LShiftVS dst shift)); 11760 effect(TEMP src); 11761 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11762 ins_encode %{ 11763 int vector_len = 0; 11764 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11765 %} 11766 ins_pipe( pipe_slow ); 11767 %} 11768 11769 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 11770 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11771 match(Set dst (LShiftVS src shift)); 11772 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11773 ins_encode %{ 11774 int vector_len = 1; 11775 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11776 %} 11777 ins_pipe( pipe_slow ); 11778 %} 11779 11780 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 11781 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11782 match(Set dst (LShiftVS src shift)); 11783 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11784 ins_encode %{ 11785 int vector_len = 1; 11786 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11787 %} 11788 ins_pipe( pipe_slow ); 11789 %} 11790 11791 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 11792 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11793 match(Set dst (LShiftVS dst shift)); 11794 effect(TEMP src); 11795 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11796 ins_encode %{ 11797 int vector_len = 1; 11798 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11799 %} 11800 ins_pipe( pipe_slow ); 11801 %} 11802 11803 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 11804 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11805 match(Set dst (LShiftVS src shift)); 11806 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11807 ins_encode %{ 11808 int vector_len = 1; 11809 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11810 %} 11811 ins_pipe( pipe_slow ); 11812 %} 11813 11814 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 11815 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11816 match(Set dst (LShiftVS src shift)); 11817 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11818 ins_encode %{ 11819 int vector_len = 1; 11820 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11821 %} 11822 ins_pipe( pipe_slow ); 11823 %} 11824 11825 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 11826 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11827 match(Set dst (LShiftVS dst shift)); 11828 effect(TEMP src); 11829 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11830 ins_encode %{ 11831 int vector_len = 1; 11832 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11833 %} 11834 ins_pipe( pipe_slow ); 11835 %} 11836 11837 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 11838 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11839 match(Set dst (LShiftVS src shift)); 11840 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 11841 ins_encode %{ 11842 int vector_len = 2; 11843 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11844 %} 11845 ins_pipe( pipe_slow ); 11846 %} 11847 11848 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 11849 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11850 match(Set dst (LShiftVS src shift)); 11851 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 11852 ins_encode %{ 11853 int vector_len = 2; 11854 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11855 %} 11856 ins_pipe( pipe_slow ); 11857 %} 11858 11859 // Integers vector left shift 11860 instruct vsll2I(vecD dst, vecS shift) %{ 11861 predicate(n->as_Vector()->length() == 2); 11862 match(Set dst (LShiftVI dst shift)); 11863 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 11864 ins_encode %{ 11865 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 11866 %} 11867 ins_pipe( pipe_slow ); 11868 %} 11869 11870 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 11871 predicate(n->as_Vector()->length() == 2); 11872 match(Set dst (LShiftVI dst shift)); 11873 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 11874 ins_encode %{ 11875 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 11876 %} 11877 ins_pipe( pipe_slow ); 11878 %} 11879 11880 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 11881 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11882 match(Set dst (LShiftVI src shift)); 11883 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 11884 ins_encode %{ 11885 int vector_len = 0; 11886 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11887 %} 11888 ins_pipe( pipe_slow ); 11889 %} 11890 11891 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 11892 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11893 match(Set dst (LShiftVI src shift)); 11894 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 11895 ins_encode %{ 11896 int vector_len = 0; 11897 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11898 %} 11899 ins_pipe( pipe_slow ); 11900 %} 11901 11902 instruct vsll4I(vecX dst, vecS shift) %{ 11903 predicate(n->as_Vector()->length() == 4); 11904 match(Set dst (LShiftVI dst shift)); 11905 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 11906 ins_encode %{ 11907 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 11908 %} 11909 ins_pipe( pipe_slow ); 11910 %} 11911 11912 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 11913 predicate(n->as_Vector()->length() == 4); 11914 match(Set dst (LShiftVI dst shift)); 11915 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 11916 ins_encode %{ 11917 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 11923 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11924 match(Set dst (LShiftVI src shift)); 11925 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 11926 ins_encode %{ 11927 int vector_len = 0; 11928 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11929 %} 11930 ins_pipe( pipe_slow ); 11931 %} 11932 11933 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 11934 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11935 match(Set dst (LShiftVI src shift)); 11936 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 11937 ins_encode %{ 11938 int vector_len = 0; 11939 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11940 %} 11941 ins_pipe( pipe_slow ); 11942 %} 11943 11944 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 11945 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11946 match(Set dst (LShiftVI src shift)); 11947 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 11948 ins_encode %{ 11949 int vector_len = 1; 11950 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11951 %} 11952 ins_pipe( pipe_slow ); 11953 %} 11954 11955 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 11956 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11957 match(Set dst (LShiftVI src shift)); 11958 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 11959 ins_encode %{ 11960 int vector_len = 1; 11961 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11962 %} 11963 ins_pipe( pipe_slow ); 11964 %} 11965 11966 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 11967 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11968 match(Set dst (LShiftVI src shift)); 11969 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 11970 ins_encode %{ 11971 int vector_len = 2; 11972 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11973 %} 11974 ins_pipe( pipe_slow ); 11975 %} 11976 11977 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 11978 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11979 match(Set dst (LShiftVI src shift)); 11980 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 11981 ins_encode %{ 11982 int vector_len = 2; 11983 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11984 %} 11985 ins_pipe( pipe_slow ); 11986 %} 11987 11988 // Longs vector left shift 11989 instruct vsll2L(vecX dst, vecS shift) %{ 11990 predicate(n->as_Vector()->length() == 2); 11991 match(Set dst (LShiftVL dst shift)); 11992 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 11993 ins_encode %{ 11994 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 11995 %} 11996 ins_pipe( pipe_slow ); 11997 %} 11998 11999 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 12000 predicate(n->as_Vector()->length() == 2); 12001 match(Set dst (LShiftVL dst shift)); 12002 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 12003 ins_encode %{ 12004 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 12005 %} 12006 ins_pipe( pipe_slow ); 12007 %} 12008 12009 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 12010 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12011 match(Set dst (LShiftVL src shift)); 12012 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 12013 ins_encode %{ 12014 int vector_len = 0; 12015 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12016 %} 12017 ins_pipe( pipe_slow ); 12018 %} 12019 12020 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 12021 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12022 match(Set dst (LShiftVL src shift)); 12023 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 12024 ins_encode %{ 12025 int vector_len = 0; 12026 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12027 %} 12028 ins_pipe( pipe_slow ); 12029 %} 12030 12031 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 12032 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12033 match(Set dst (LShiftVL src shift)); 12034 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 12035 ins_encode %{ 12036 int vector_len = 1; 12037 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12038 %} 12039 ins_pipe( pipe_slow ); 12040 %} 12041 12042 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 12043 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12044 match(Set dst (LShiftVL src shift)); 12045 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 12046 ins_encode %{ 12047 int vector_len = 1; 12048 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12049 %} 12050 ins_pipe( pipe_slow ); 12051 %} 12052 12053 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 12054 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12055 match(Set dst (LShiftVL src shift)); 12056 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 12057 ins_encode %{ 12058 int vector_len = 2; 12059 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12060 %} 12061 ins_pipe( pipe_slow ); 12062 %} 12063 12064 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12065 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12066 match(Set dst (LShiftVL src shift)); 12067 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 12068 ins_encode %{ 12069 int vector_len = 2; 12070 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12071 %} 12072 ins_pipe( pipe_slow ); 12073 %} 12074 12075 // ----------------------- LogicalRightShift ----------------------------------- 12076 12077 // Shorts vector logical right shift produces incorrect Java result 12078 // for negative data because java code convert short value into int with 12079 // sign extension before a shift. But char vectors are fine since chars are 12080 // unsigned values. 12081 12082 instruct vsrl2S(vecS dst, vecS shift) %{ 12083 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12084 match(Set dst (URShiftVS dst shift)); 12085 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 12086 ins_encode %{ 12087 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 12088 %} 12089 ins_pipe( pipe_slow ); 12090 %} 12091 12092 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 12093 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12094 match(Set dst (URShiftVS dst shift)); 12095 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 12096 ins_encode %{ 12097 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 12098 %} 12099 ins_pipe( pipe_slow ); 12100 %} 12101 12102 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 12103 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12104 match(Set dst (URShiftVS src shift)); 12105 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12106 ins_encode %{ 12107 int vector_len = 0; 12108 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12109 %} 12110 ins_pipe( pipe_slow ); 12111 %} 12112 12113 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 12114 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12115 match(Set dst (URShiftVS src shift)); 12116 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12117 ins_encode %{ 12118 int vector_len = 0; 12119 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12120 %} 12121 ins_pipe( pipe_slow ); 12122 %} 12123 12124 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 12125 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12126 match(Set dst (URShiftVS dst shift)); 12127 effect(TEMP src); 12128 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12129 ins_encode %{ 12130 int vector_len = 0; 12131 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12132 %} 12133 ins_pipe( pipe_slow ); 12134 %} 12135 12136 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 12137 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12138 match(Set dst (URShiftVS src shift)); 12139 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12140 ins_encode %{ 12141 int vector_len = 0; 12142 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12143 %} 12144 ins_pipe( pipe_slow ); 12145 %} 12146 12147 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 12148 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12149 match(Set dst (URShiftVS src shift)); 12150 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12151 ins_encode %{ 12152 int vector_len = 0; 12153 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12154 %} 12155 ins_pipe( pipe_slow ); 12156 %} 12157 12158 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 12159 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12160 match(Set dst (URShiftVS dst shift)); 12161 effect(TEMP src); 12162 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12163 ins_encode %{ 12164 int vector_len = 0; 12165 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12166 %} 12167 ins_pipe( pipe_slow ); 12168 %} 12169 12170 instruct vsrl4S(vecD dst, vecS shift) %{ 12171 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12172 match(Set dst (URShiftVS dst shift)); 12173 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 12174 ins_encode %{ 12175 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 12176 %} 12177 ins_pipe( pipe_slow ); 12178 %} 12179 12180 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 12181 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12182 match(Set dst (URShiftVS dst shift)); 12183 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 12184 ins_encode %{ 12185 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 12186 %} 12187 ins_pipe( pipe_slow ); 12188 %} 12189 12190 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 12191 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12192 match(Set dst (URShiftVS src shift)); 12193 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12194 ins_encode %{ 12195 int vector_len = 0; 12196 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12197 %} 12198 ins_pipe( pipe_slow ); 12199 %} 12200 12201 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 12202 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12203 match(Set dst (URShiftVS src shift)); 12204 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12205 ins_encode %{ 12206 int vector_len = 0; 12207 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12208 %} 12209 ins_pipe( pipe_slow ); 12210 %} 12211 12212 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 12213 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12214 match(Set dst (URShiftVS dst shift)); 12215 effect(TEMP src); 12216 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12217 ins_encode %{ 12218 int vector_len = 0; 12219 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12220 %} 12221 ins_pipe( pipe_slow ); 12222 %} 12223 12224 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 12225 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12226 match(Set dst (URShiftVS src shift)); 12227 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12228 ins_encode %{ 12229 int vector_len = 0; 12230 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12231 %} 12232 ins_pipe( pipe_slow ); 12233 %} 12234 12235 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 12236 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12237 match(Set dst (URShiftVS src shift)); 12238 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12239 ins_encode %{ 12240 int vector_len = 0; 12241 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12242 %} 12243 ins_pipe( pipe_slow ); 12244 %} 12245 12246 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 12247 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12248 match(Set dst (URShiftVS dst shift)); 12249 effect(TEMP src); 12250 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12251 ins_encode %{ 12252 int vector_len = 0; 12253 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12254 %} 12255 ins_pipe( pipe_slow ); 12256 %} 12257 12258 instruct vsrl8S(vecX dst, vecS shift) %{ 12259 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12260 match(Set dst (URShiftVS dst shift)); 12261 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 12262 ins_encode %{ 12263 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 12264 %} 12265 ins_pipe( pipe_slow ); 12266 %} 12267 12268 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 12269 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12270 match(Set dst (URShiftVS dst shift)); 12271 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 12272 ins_encode %{ 12273 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 12274 %} 12275 ins_pipe( pipe_slow ); 12276 %} 12277 12278 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 12279 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12280 match(Set dst (URShiftVS src shift)); 12281 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12282 ins_encode %{ 12283 int vector_len = 0; 12284 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12285 %} 12286 ins_pipe( pipe_slow ); 12287 %} 12288 12289 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 12290 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12291 match(Set dst (URShiftVS src shift)); 12292 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12293 ins_encode %{ 12294 int vector_len = 0; 12295 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12296 %} 12297 ins_pipe( pipe_slow ); 12298 %} 12299 12300 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 12301 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12302 match(Set dst (URShiftVS dst shift)); 12303 effect(TEMP src); 12304 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12305 ins_encode %{ 12306 int vector_len = 0; 12307 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12308 %} 12309 ins_pipe( pipe_slow ); 12310 %} 12311 12312 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 12313 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12314 match(Set dst (URShiftVS src shift)); 12315 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12316 ins_encode %{ 12317 int vector_len = 0; 12318 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12319 %} 12320 ins_pipe( pipe_slow ); 12321 %} 12322 12323 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 12324 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12325 match(Set dst (URShiftVS src shift)); 12326 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12327 ins_encode %{ 12328 int vector_len = 0; 12329 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12330 %} 12331 ins_pipe( pipe_slow ); 12332 %} 12333 12334 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 12335 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12336 match(Set dst (URShiftVS dst shift)); 12337 effect(TEMP src); 12338 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12339 ins_encode %{ 12340 int vector_len = 0; 12341 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12342 %} 12343 ins_pipe( pipe_slow ); 12344 %} 12345 12346 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 12347 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12348 match(Set dst (URShiftVS src shift)); 12349 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12350 ins_encode %{ 12351 int vector_len = 1; 12352 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12353 %} 12354 ins_pipe( pipe_slow ); 12355 %} 12356 12357 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 12358 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12359 match(Set dst (URShiftVS src shift)); 12360 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12361 ins_encode %{ 12362 int vector_len = 1; 12363 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12364 %} 12365 ins_pipe( pipe_slow ); 12366 %} 12367 12368 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 12369 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12370 match(Set dst (URShiftVS dst shift)); 12371 effect(TEMP src); 12372 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12373 ins_encode %{ 12374 int vector_len = 1; 12375 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12376 %} 12377 ins_pipe( pipe_slow ); 12378 %} 12379 12380 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 12381 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12382 match(Set dst (URShiftVS src shift)); 12383 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12384 ins_encode %{ 12385 int vector_len = 1; 12386 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12387 %} 12388 ins_pipe( pipe_slow ); 12389 %} 12390 12391 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 12392 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12393 match(Set dst (URShiftVS src shift)); 12394 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12395 ins_encode %{ 12396 int vector_len = 1; 12397 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12398 %} 12399 ins_pipe( pipe_slow ); 12400 %} 12401 12402 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 12403 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12404 match(Set dst (URShiftVS dst shift)); 12405 effect(TEMP src); 12406 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12407 ins_encode %{ 12408 int vector_len = 1; 12409 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12410 %} 12411 ins_pipe( pipe_slow ); 12412 %} 12413 12414 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 12415 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12416 match(Set dst (URShiftVS src shift)); 12417 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 12418 ins_encode %{ 12419 int vector_len = 2; 12420 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12421 %} 12422 ins_pipe( pipe_slow ); 12423 %} 12424 12425 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12426 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12427 match(Set dst (URShiftVS src shift)); 12428 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 12429 ins_encode %{ 12430 int vector_len = 2; 12431 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12432 %} 12433 ins_pipe( pipe_slow ); 12434 %} 12435 12436 // Integers vector logical right shift 12437 instruct vsrl2I(vecD dst, vecS shift) %{ 12438 predicate(n->as_Vector()->length() == 2); 12439 match(Set dst (URShiftVI dst shift)); 12440 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 12441 ins_encode %{ 12442 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 12443 %} 12444 ins_pipe( pipe_slow ); 12445 %} 12446 12447 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 12448 predicate(n->as_Vector()->length() == 2); 12449 match(Set dst (URShiftVI dst shift)); 12450 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 12451 ins_encode %{ 12452 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 12453 %} 12454 ins_pipe( pipe_slow ); 12455 %} 12456 12457 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 12458 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12459 match(Set dst (URShiftVI src shift)); 12460 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 12461 ins_encode %{ 12462 int vector_len = 0; 12463 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12464 %} 12465 ins_pipe( pipe_slow ); 12466 %} 12467 12468 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 12469 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12470 match(Set dst (URShiftVI src shift)); 12471 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 12472 ins_encode %{ 12473 int vector_len = 0; 12474 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12475 %} 12476 ins_pipe( pipe_slow ); 12477 %} 12478 12479 instruct vsrl4I(vecX dst, vecS shift) %{ 12480 predicate(n->as_Vector()->length() == 4); 12481 match(Set dst (URShiftVI dst shift)); 12482 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 12483 ins_encode %{ 12484 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 12485 %} 12486 ins_pipe( pipe_slow ); 12487 %} 12488 12489 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 12490 predicate(n->as_Vector()->length() == 4); 12491 match(Set dst (URShiftVI dst shift)); 12492 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 12493 ins_encode %{ 12494 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 12495 %} 12496 ins_pipe( pipe_slow ); 12497 %} 12498 12499 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 12500 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12501 match(Set dst (URShiftVI src shift)); 12502 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 12503 ins_encode %{ 12504 int vector_len = 0; 12505 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12506 %} 12507 ins_pipe( pipe_slow ); 12508 %} 12509 12510 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 12511 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12512 match(Set dst (URShiftVI src shift)); 12513 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 12514 ins_encode %{ 12515 int vector_len = 0; 12516 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12517 %} 12518 ins_pipe( pipe_slow ); 12519 %} 12520 12521 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 12522 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12523 match(Set dst (URShiftVI src shift)); 12524 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 12525 ins_encode %{ 12526 int vector_len = 1; 12527 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12528 %} 12529 ins_pipe( pipe_slow ); 12530 %} 12531 12532 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 12533 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12534 match(Set dst (URShiftVI src shift)); 12535 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 12536 ins_encode %{ 12537 int vector_len = 1; 12538 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12539 %} 12540 ins_pipe( pipe_slow ); 12541 %} 12542 12543 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 12544 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12545 match(Set dst (URShiftVI src shift)); 12546 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 12547 ins_encode %{ 12548 int vector_len = 2; 12549 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12550 %} 12551 ins_pipe( pipe_slow ); 12552 %} 12553 12554 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12555 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12556 match(Set dst (URShiftVI src shift)); 12557 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 12558 ins_encode %{ 12559 int vector_len = 2; 12560 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12561 %} 12562 ins_pipe( pipe_slow ); 12563 %} 12564 12565 // Longs vector logical right shift 12566 instruct vsrl2L(vecX dst, vecS shift) %{ 12567 predicate(n->as_Vector()->length() == 2); 12568 match(Set dst (URShiftVL dst shift)); 12569 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 12570 ins_encode %{ 12571 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 12572 %} 12573 ins_pipe( pipe_slow ); 12574 %} 12575 12576 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 12577 predicate(n->as_Vector()->length() == 2); 12578 match(Set dst (URShiftVL dst shift)); 12579 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 12580 ins_encode %{ 12581 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 12582 %} 12583 ins_pipe( pipe_slow ); 12584 %} 12585 12586 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 12587 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12588 match(Set dst (URShiftVL src shift)); 12589 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 12590 ins_encode %{ 12591 int vector_len = 0; 12592 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12593 %} 12594 ins_pipe( pipe_slow ); 12595 %} 12596 12597 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 12598 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12599 match(Set dst (URShiftVL src shift)); 12600 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 12601 ins_encode %{ 12602 int vector_len = 0; 12603 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12604 %} 12605 ins_pipe( pipe_slow ); 12606 %} 12607 12608 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 12609 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12610 match(Set dst (URShiftVL src shift)); 12611 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 12612 ins_encode %{ 12613 int vector_len = 1; 12614 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12615 %} 12616 ins_pipe( pipe_slow ); 12617 %} 12618 12619 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 12620 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12621 match(Set dst (URShiftVL src shift)); 12622 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 12623 ins_encode %{ 12624 int vector_len = 1; 12625 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12626 %} 12627 ins_pipe( pipe_slow ); 12628 %} 12629 12630 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 12631 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12632 match(Set dst (URShiftVL src shift)); 12633 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 12634 ins_encode %{ 12635 int vector_len = 2; 12636 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12637 %} 12638 ins_pipe( pipe_slow ); 12639 %} 12640 12641 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12642 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12643 match(Set dst (URShiftVL src shift)); 12644 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 12645 ins_encode %{ 12646 int vector_len = 2; 12647 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12648 %} 12649 ins_pipe( pipe_slow ); 12650 %} 12651 12652 // ------------------- ArithmeticRightShift ----------------------------------- 12653 12654 // Shorts/Chars vector arithmetic right shift 12655 instruct vsra2S(vecS dst, vecS shift) %{ 12656 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12657 match(Set dst (RShiftVS dst shift)); 12658 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 12659 ins_encode %{ 12660 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 12661 %} 12662 ins_pipe( pipe_slow ); 12663 %} 12664 12665 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 12666 predicate(n->as_Vector()->length() == 2); 12667 match(Set dst (RShiftVS dst shift)); 12668 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 12669 ins_encode %{ 12670 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 12671 %} 12672 ins_pipe( pipe_slow ); 12673 %} 12674 12675 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 12676 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12677 match(Set dst (RShiftVS src shift)); 12678 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12679 ins_encode %{ 12680 int vector_len = 0; 12681 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12682 %} 12683 ins_pipe( pipe_slow ); 12684 %} 12685 12686 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 12687 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12688 match(Set dst (RShiftVS src shift)); 12689 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12690 ins_encode %{ 12691 int vector_len = 0; 12692 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12693 %} 12694 ins_pipe( pipe_slow ); 12695 %} 12696 12697 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 12698 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12699 match(Set dst (RShiftVS dst shift)); 12700 effect(TEMP src); 12701 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12702 ins_encode %{ 12703 int vector_len = 0; 12704 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12705 %} 12706 ins_pipe( pipe_slow ); 12707 %} 12708 12709 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 12710 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12711 match(Set dst (RShiftVS src shift)); 12712 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12713 ins_encode %{ 12714 int vector_len = 0; 12715 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12716 %} 12717 ins_pipe( pipe_slow ); 12718 %} 12719 12720 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 12721 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12722 match(Set dst (RShiftVS src shift)); 12723 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12724 ins_encode %{ 12725 int vector_len = 0; 12726 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12727 %} 12728 ins_pipe( pipe_slow ); 12729 %} 12730 12731 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 12732 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12733 match(Set dst (RShiftVS dst shift)); 12734 effect(TEMP src); 12735 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12736 ins_encode %{ 12737 int vector_len = 0; 12738 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12739 %} 12740 ins_pipe( pipe_slow ); 12741 %} 12742 12743 instruct vsra4S(vecD dst, vecS shift) %{ 12744 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12745 match(Set dst (RShiftVS dst shift)); 12746 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 12747 ins_encode %{ 12748 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 12749 %} 12750 ins_pipe( pipe_slow ); 12751 %} 12752 12753 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 12754 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12755 match(Set dst (RShiftVS dst shift)); 12756 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 12757 ins_encode %{ 12758 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 12759 %} 12760 ins_pipe( pipe_slow ); 12761 %} 12762 12763 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 12764 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12765 match(Set dst (RShiftVS src shift)); 12766 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12767 ins_encode %{ 12768 int vector_len = 0; 12769 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12770 %} 12771 ins_pipe( pipe_slow ); 12772 %} 12773 12774 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 12775 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12776 match(Set dst (RShiftVS src shift)); 12777 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12778 ins_encode %{ 12779 int vector_len = 0; 12780 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12781 %} 12782 ins_pipe( pipe_slow ); 12783 %} 12784 12785 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 12786 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12787 match(Set dst (RShiftVS dst shift)); 12788 effect(TEMP src); 12789 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12790 ins_encode %{ 12791 int vector_len = 0; 12792 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12793 %} 12794 ins_pipe( pipe_slow ); 12795 %} 12796 12797 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 12798 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12799 match(Set dst (RShiftVS src shift)); 12800 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12801 ins_encode %{ 12802 int vector_len = 0; 12803 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12804 %} 12805 ins_pipe( pipe_slow ); 12806 %} 12807 12808 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 12809 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12810 match(Set dst (RShiftVS src shift)); 12811 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12812 ins_encode %{ 12813 int vector_len = 0; 12814 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12815 %} 12816 ins_pipe( pipe_slow ); 12817 %} 12818 12819 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 12820 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12821 match(Set dst (RShiftVS dst shift)); 12822 effect(TEMP src); 12823 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12824 ins_encode %{ 12825 int vector_len = 0; 12826 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12827 %} 12828 ins_pipe( pipe_slow ); 12829 %} 12830 12831 instruct vsra8S(vecX dst, vecS shift) %{ 12832 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12833 match(Set dst (RShiftVS dst shift)); 12834 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 12835 ins_encode %{ 12836 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 12837 %} 12838 ins_pipe( pipe_slow ); 12839 %} 12840 12841 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 12842 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12843 match(Set dst (RShiftVS dst shift)); 12844 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 12845 ins_encode %{ 12846 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 12847 %} 12848 ins_pipe( pipe_slow ); 12849 %} 12850 12851 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 12852 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12853 match(Set dst (RShiftVS src shift)); 12854 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12855 ins_encode %{ 12856 int vector_len = 0; 12857 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12858 %} 12859 ins_pipe( pipe_slow ); 12860 %} 12861 12862 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 12863 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12864 match(Set dst (RShiftVS src shift)); 12865 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12866 ins_encode %{ 12867 int vector_len = 0; 12868 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12869 %} 12870 ins_pipe( pipe_slow ); 12871 %} 12872 12873 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 12874 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12875 match(Set dst (RShiftVS dst shift)); 12876 effect(TEMP src); 12877 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12878 ins_encode %{ 12879 int vector_len = 0; 12880 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12881 %} 12882 ins_pipe( pipe_slow ); 12883 %} 12884 12885 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 12886 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12887 match(Set dst (RShiftVS src shift)); 12888 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12889 ins_encode %{ 12890 int vector_len = 0; 12891 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12892 %} 12893 ins_pipe( pipe_slow ); 12894 %} 12895 12896 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 12897 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12898 match(Set dst (RShiftVS src shift)); 12899 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12900 ins_encode %{ 12901 int vector_len = 0; 12902 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12903 %} 12904 ins_pipe( pipe_slow ); 12905 %} 12906 12907 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 12908 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12909 match(Set dst (RShiftVS dst shift)); 12910 effect(TEMP src); 12911 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12912 ins_encode %{ 12913 int vector_len = 0; 12914 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12915 %} 12916 ins_pipe( pipe_slow ); 12917 %} 12918 12919 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 12920 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12921 match(Set dst (RShiftVS src shift)); 12922 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12923 ins_encode %{ 12924 int vector_len = 1; 12925 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12926 %} 12927 ins_pipe( pipe_slow ); 12928 %} 12929 12930 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 12931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12932 match(Set dst (RShiftVS src shift)); 12933 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12934 ins_encode %{ 12935 int vector_len = 1; 12936 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12937 %} 12938 ins_pipe( pipe_slow ); 12939 %} 12940 12941 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 12942 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12943 match(Set dst (RShiftVS dst shift)); 12944 effect(TEMP src); 12945 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12946 ins_encode %{ 12947 int vector_len = 1; 12948 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12949 %} 12950 ins_pipe( pipe_slow ); 12951 %} 12952 12953 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 12954 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 12955 match(Set dst (RShiftVS src shift)); 12956 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12957 ins_encode %{ 12958 int vector_len = 1; 12959 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12960 %} 12961 ins_pipe( pipe_slow ); 12962 %} 12963 12964 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 12965 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12966 match(Set dst (RShiftVS src shift)); 12967 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12968 ins_encode %{ 12969 int vector_len = 1; 12970 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12971 %} 12972 ins_pipe( pipe_slow ); 12973 %} 12974 12975 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 12976 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12977 match(Set dst (RShiftVS dst shift)); 12978 effect(TEMP src); 12979 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12980 ins_encode %{ 12981 int vector_len = 1; 12982 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12983 %} 12984 ins_pipe( pipe_slow ); 12985 %} 12986 12987 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 12988 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12989 match(Set dst (RShiftVS src shift)); 12990 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 12991 ins_encode %{ 12992 int vector_len = 2; 12993 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12994 %} 12995 ins_pipe( pipe_slow ); 12996 %} 12997 12998 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12999 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 13000 match(Set dst (RShiftVS src shift)); 13001 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 13002 ins_encode %{ 13003 int vector_len = 2; 13004 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13005 %} 13006 ins_pipe( pipe_slow ); 13007 %} 13008 13009 // Integers vector arithmetic right shift 13010 instruct vsra2I(vecD dst, vecS shift) %{ 13011 predicate(n->as_Vector()->length() == 2); 13012 match(Set dst (RShiftVI dst shift)); 13013 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 13014 ins_encode %{ 13015 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 13016 %} 13017 ins_pipe( pipe_slow ); 13018 %} 13019 13020 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 13021 predicate(n->as_Vector()->length() == 2); 13022 match(Set dst (RShiftVI dst shift)); 13023 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 13024 ins_encode %{ 13025 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 13026 %} 13027 ins_pipe( pipe_slow ); 13028 %} 13029 13030 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 13031 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13032 match(Set dst (RShiftVI src shift)); 13033 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 13034 ins_encode %{ 13035 int vector_len = 0; 13036 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13037 %} 13038 ins_pipe( pipe_slow ); 13039 %} 13040 13041 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 13042 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13043 match(Set dst (RShiftVI src shift)); 13044 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 13045 ins_encode %{ 13046 int vector_len = 0; 13047 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13048 %} 13049 ins_pipe( pipe_slow ); 13050 %} 13051 13052 instruct vsra4I(vecX dst, vecS shift) %{ 13053 predicate(n->as_Vector()->length() == 4); 13054 match(Set dst (RShiftVI dst shift)); 13055 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 13056 ins_encode %{ 13057 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 13058 %} 13059 ins_pipe( pipe_slow ); 13060 %} 13061 13062 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 13063 predicate(n->as_Vector()->length() == 4); 13064 match(Set dst (RShiftVI dst shift)); 13065 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 13066 ins_encode %{ 13067 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 13068 %} 13069 ins_pipe( pipe_slow ); 13070 %} 13071 13072 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 13073 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13074 match(Set dst (RShiftVI src shift)); 13075 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 13076 ins_encode %{ 13077 int vector_len = 0; 13078 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13079 %} 13080 ins_pipe( pipe_slow ); 13081 %} 13082 13083 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 13084 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13085 match(Set dst (RShiftVI src shift)); 13086 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 13087 ins_encode %{ 13088 int vector_len = 0; 13089 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13090 %} 13091 ins_pipe( pipe_slow ); 13092 %} 13093 13094 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 13095 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 13096 match(Set dst (RShiftVI src shift)); 13097 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 13098 ins_encode %{ 13099 int vector_len = 1; 13100 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13101 %} 13102 ins_pipe( pipe_slow ); 13103 %} 13104 13105 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 13106 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 13107 match(Set dst (RShiftVI src shift)); 13108 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 13109 ins_encode %{ 13110 int vector_len = 1; 13111 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13112 %} 13113 ins_pipe( pipe_slow ); 13114 %} 13115 13116 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 13117 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13118 match(Set dst (RShiftVI src shift)); 13119 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 13120 ins_encode %{ 13121 int vector_len = 2; 13122 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13123 %} 13124 ins_pipe( pipe_slow ); 13125 %} 13126 13127 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 13128 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13129 match(Set dst (RShiftVI src shift)); 13130 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 13131 ins_encode %{ 13132 int vector_len = 2; 13133 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13134 %} 13135 ins_pipe( pipe_slow ); 13136 %} 13137 13138 // There are no longs vector arithmetic right shift instructions. 13139 13140 // ------------------- Variable Bit Shift Left Logical ----------------------------- 13141 //Integer Variable left shift 13142 instruct vsllv2I(vecD dst, vecD src, vecD shift) %{ 13143 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 13144 match(Set dst (LShiftVI src shift)); 13145 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed2I" %} 13146 ins_encode %{ 13147 int vector_len = 0; 13148 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13149 %} 13150 ins_pipe( pipe_slow ); 13151 %} 13152 13153 instruct vsllv4I_reg(vecX dst, vecX src, vecX shift) %{ 13154 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 13155 match(Set dst (LShiftVI src shift)); 13156 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 13157 ins_encode %{ 13158 int vector_len = 0; 13159 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13160 %} 13161 ins_pipe( pipe_slow ); 13162 %} 13163 13164 instruct vsllv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 13165 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 13166 match(Set dst (LShiftVI src shift)); 13167 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 13168 ins_encode %{ 13169 int vector_len = 0; 13170 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13171 %} 13172 ins_pipe( pipe_slow ); 13173 %} 13174 13175 instruct vsllv8I_reg(vecY dst, vecY src, vecY shift) %{ 13176 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13177 match(Set dst (LShiftVI src shift)); 13178 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 13179 ins_encode %{ 13180 int vector_len = 1; 13181 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13182 %} 13183 ins_pipe( pipe_slow ); 13184 %} 13185 13186 instruct vsllv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 13187 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13188 match(Set dst (LShiftVI src shift)); 13189 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 13190 ins_encode %{ 13191 int vector_len = 1; 13192 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13193 %} 13194 ins_pipe( pipe_slow ); 13195 %} 13196 13197 instruct vsllv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13198 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_LShiftCntV); 13199 match(Set dst (LShiftVI src shift)); 13200 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 13201 ins_encode %{ 13202 int vector_len = 2; 13203 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13204 %} 13205 ins_pipe( pipe_slow ); 13206 %} 13207 13208 //Long Variable left shift 13209 instruct vsllv1L_reg(vecD dst, vecD src, vecD shift) %{ 13210 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_LShiftCntV); 13211 match(Set dst (LShiftVL src shift)); 13212 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed1L" %} 13213 ins_encode %{ 13214 int vector_len = 0; 13215 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13216 %} 13217 ins_pipe( pipe_slow ); 13218 %} 13219 13220 instruct vsllv2L_reg(vecX dst, vecX src, vecX shift) %{ 13221 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 13222 match(Set dst (LShiftVL src shift)); 13223 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 13224 ins_encode %{ 13225 int vector_len = 0; 13226 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13227 %} 13228 ins_pipe( pipe_slow ); 13229 %} 13230 13231 instruct vsllv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 13232 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 13233 match(Set dst (LShiftVL src shift)); 13234 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 13235 ins_encode %{ 13236 int vector_len = 0; 13237 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13238 %} 13239 ins_pipe( pipe_slow ); 13240 %} 13241 13242 instruct vsllv4L_reg(vecY dst, vecY src, vecY shift) %{ 13243 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 13244 match(Set dst (LShiftVL src shift)); 13245 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 13246 ins_encode %{ 13247 int vector_len = 1; 13248 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13249 %} 13250 ins_pipe( pipe_slow ); 13251 %} 13252 13253 instruct vsllv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 13254 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13255 match(Set dst (LShiftVL src shift)); 13256 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 13257 ins_encode %{ 13258 int vector_len = 1; 13259 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13260 %} 13261 ins_pipe( pipe_slow ); 13262 %} 13263 13264 instruct vsllv8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13265 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13266 match(Set dst (LShiftVL src shift)); 13267 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 13268 ins_encode %{ 13269 int vector_len = 2; 13270 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13271 %} 13272 ins_pipe( pipe_slow ); 13273 %} 13274 13275 // ------------------- Variable Bit Shift Right Logical ----------------------------- 13276 //Integer Variable right shift 13277 instruct vsrlv2I_reg(vecD dst, vecD src, vecD shift) %{ 13278 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13279 match(Set dst (URShiftVI src shift)); 13280 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 13281 ins_encode %{ 13282 int vector_len = 0; 13283 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13284 %} 13285 ins_pipe( pipe_slow ); 13286 %} 13287 13288 instruct vsrlv4I_reg(vecX dst, vecX src, vecX shift) %{ 13289 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13290 match(Set dst (URShiftVI src shift)); 13291 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13292 ins_encode %{ 13293 int vector_len = 0; 13294 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13295 %} 13296 ins_pipe( pipe_slow ); 13297 %} 13298 13299 instruct vsrlv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 13300 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13301 match(Set dst (URShiftVI src shift)); 13302 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13303 ins_encode %{ 13304 int vector_len = 0; 13305 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13306 %} 13307 ins_pipe( pipe_slow ); 13308 %} 13309 13310 instruct vsrlv8I_reg(vecY dst, vecY src, vecY shift) %{ 13311 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13312 match(Set dst (URShiftVI src shift)); 13313 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13314 ins_encode %{ 13315 int vector_len = 1; 13316 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13317 %} 13318 ins_pipe( pipe_slow ); 13319 %} 13320 13321 instruct vsrlv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 13322 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13323 match(Set dst (URShiftVI src shift)); 13324 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13325 ins_encode %{ 13326 int vector_len = 1; 13327 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13328 %} 13329 ins_pipe( pipe_slow ); 13330 %} 13331 13332 instruct vsrlv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13333 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 13334 match(Set dst (URShiftVI src shift)); 13335 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 13336 ins_encode %{ 13337 int vector_len = 2; 13338 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13339 %} 13340 ins_pipe( pipe_slow ); 13341 %} 13342 13343 //Long Variable right shift 13344 instruct vsrlv1L_reg(vecD dst, vecD src, vecD shift) %{ 13345 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 13346 match(Set dst (URShiftVL src shift)); 13347 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} 13348 ins_encode %{ 13349 int vector_len = 0; 13350 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13351 %} 13352 ins_pipe( pipe_slow ); 13353 %} 13354 13355 instruct vsrlv2L_reg(vecX dst, vecX src, vecX shift) %{ 13356 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13357 match(Set dst (URShiftVL src shift)); 13358 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13359 ins_encode %{ 13360 int vector_len = 0; 13361 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13362 %} 13363 ins_pipe( pipe_slow ); 13364 %} 13365 13366 instruct vsrlv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 13367 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13368 match(Set dst (URShiftVL src shift)); 13369 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13370 ins_encode %{ 13371 int vector_len = 0; 13372 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13373 %} 13374 ins_pipe( pipe_slow ); 13375 %} 13376 13377 instruct vsrlv4L_reg(vecY dst, vecY src, vecY shift) %{ 13378 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13379 match(Set dst (URShiftVL src shift)); 13380 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13381 ins_encode %{ 13382 int vector_len = 1; 13383 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13384 %} 13385 ins_pipe( pipe_slow ); 13386 %} 13387 13388 instruct vsrlv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 13389 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13390 match(Set dst (URShiftVL src shift)); 13391 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13392 ins_encode %{ 13393 int vector_len = 1; 13394 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13395 %} 13396 ins_pipe( pipe_slow ); 13397 %} 13398 13399 instruct vsrlv8L_reg(vecZ dst, vecZ src, vecZ shift) %{ 13400 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13401 match(Set dst (URShiftVL src shift)); 13402 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 13403 ins_encode %{ 13404 int vector_len = 2; 13405 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13406 %} 13407 ins_pipe( pipe_slow ); 13408 %} 13409 13410 // ------------------- Variable Bit Shift Right Arithmetic ----------------------------- 13411 //Integer Variable right shift 13412 instruct vsrav2I_reg(vecD dst, vecD src, vecD shift) %{ 13413 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13414 match(Set dst (RShiftVI src shift)); 13415 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 13416 ins_encode %{ 13417 int vector_len = 0; 13418 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13419 %} 13420 ins_pipe( pipe_slow ); 13421 %} 13422 13423 instruct vsrav4I_reg(vecX dst, vecX src, vecX shift) %{ 13424 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13425 match(Set dst (RShiftVI src shift)); 13426 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13427 ins_encode %{ 13428 int vector_len = 0; 13429 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13430 %} 13431 ins_pipe( pipe_slow ); 13432 %} 13433 13434 instruct vsrav4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 13435 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13436 match(Set dst (RShiftVI src shift)); 13437 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13438 ins_encode %{ 13439 int vector_len = 0; 13440 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13441 %} 13442 ins_pipe( pipe_slow ); 13443 %} 13444 13445 instruct vsrav8I_reg(vecY dst, vecY src, vecY shift) %{ 13446 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13447 match(Set dst (RShiftVI src shift)); 13448 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13449 ins_encode %{ 13450 int vector_len = 1; 13451 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13452 %} 13453 ins_pipe( pipe_slow ); 13454 %} 13455 13456 instruct vsrav8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 13457 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13458 match(Set dst (RShiftVI src shift)); 13459 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13460 ins_encode %{ 13461 int vector_len = 1; 13462 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13463 %} 13464 ins_pipe( pipe_slow ); 13465 %} 13466 13467 instruct vsrav16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13468 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 13469 match(Set dst (RShiftVI src shift)); 13470 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 13471 ins_encode %{ 13472 int vector_len = 2; 13473 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13474 %} 13475 ins_pipe( pipe_slow ); 13476 %} 13477 13478 //Long Variable right shift arithmetic 13479 instruct vsrav1L_reg(vecD dst, vecD src, vecD shift) %{ 13480 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 13481 match(Set dst (RShiftVL src shift)); 13482 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} 13483 ins_encode %{ 13484 int vector_len = 0; 13485 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13486 %} 13487 ins_pipe( pipe_slow ); 13488 %} 13489 13490 instruct vsrav2L_reg(vecX dst, vecX src, vecX shift) %{ 13491 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13492 match(Set dst (RShiftVL src shift)); 13493 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13494 ins_encode %{ 13495 int vector_len = 0; 13496 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13497 %} 13498 ins_pipe( pipe_slow ); 13499 %} 13500 13501 instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 13502 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13503 match(Set dst (RShiftVL src shift)); 13504 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13505 ins_encode %{ 13506 int vector_len = 0; 13507 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13508 %} 13509 ins_pipe( pipe_slow ); 13510 %} 13511 13512 instruct vsrav4L_reg(vecY dst, vecY src, vecY shift) %{ 13513 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13514 match(Set dst (RShiftVL src shift)); 13515 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13516 ins_encode %{ 13517 int vector_len = 1; 13518 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13519 %} 13520 ins_pipe( pipe_slow ); 13521 %} 13522 13523 instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 13524 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13525 match(Set dst (RShiftVL src shift)); 13526 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13527 ins_encode %{ 13528 int vector_len = 1; 13529 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13530 %} 13531 ins_pipe( pipe_slow ); 13532 %} 13533 13534 instruct vsrav8L_reg(vecZ dst, vecZ src, vecZ shift) %{ 13535 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13536 match(Set dst (RShiftVL src shift)); 13537 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 13538 ins_encode %{ 13539 int vector_len = 2; 13540 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13541 %} 13542 ins_pipe( pipe_slow ); 13543 %} 13544 13545 // --------------------------------- AND -------------------------------------- 13546 13547 instruct vand4B(vecS dst, vecS src) %{ 13548 predicate(n->as_Vector()->length_in_bytes() == 4); 13549 match(Set dst (AndV dst src)); 13550 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 13551 ins_encode %{ 13552 __ pand($dst$$XMMRegister, $src$$XMMRegister); 13553 %} 13554 ins_pipe( pipe_slow ); 13555 %} 13556 13557 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 13558 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13559 match(Set dst (AndV src1 src2)); 13560 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 13561 ins_encode %{ 13562 int vector_len = 0; 13563 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13564 %} 13565 ins_pipe( pipe_slow ); 13566 %} 13567 13568 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 13569 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13570 match(Set dst (AndV src (LoadVector mem))); 13571 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 13572 ins_encode %{ 13573 int vector_len = 0; 13574 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13575 %} 13576 ins_pipe( pipe_slow ); 13577 %} 13578 13579 instruct vand8B(vecD dst, vecD src) %{ 13580 predicate(n->as_Vector()->length_in_bytes() == 8); 13581 match(Set dst (AndV dst src)); 13582 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 13583 ins_encode %{ 13584 __ pand($dst$$XMMRegister, $src$$XMMRegister); 13585 %} 13586 ins_pipe( pipe_slow ); 13587 %} 13588 13589 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 13590 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13591 match(Set dst (AndV src1 src2)); 13592 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 13593 ins_encode %{ 13594 int vector_len = 0; 13595 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13596 %} 13597 ins_pipe( pipe_slow ); 13598 %} 13599 13600 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 13601 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13602 match(Set dst (AndV src (LoadVector mem))); 13603 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 13604 ins_encode %{ 13605 int vector_len = 0; 13606 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13607 %} 13608 ins_pipe( pipe_slow ); 13609 %} 13610 13611 instruct vand16B(vecX dst, vecX src) %{ 13612 predicate(n->as_Vector()->length_in_bytes() == 16); 13613 match(Set dst (AndV dst src)); 13614 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 13615 ins_encode %{ 13616 __ pand($dst$$XMMRegister, $src$$XMMRegister); 13617 %} 13618 ins_pipe( pipe_slow ); 13619 %} 13620 13621 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 13622 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13623 match(Set dst (AndV src1 src2)); 13624 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 13625 ins_encode %{ 13626 int vector_len = 0; 13627 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13628 %} 13629 ins_pipe( pipe_slow ); 13630 %} 13631 13632 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 13633 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13634 match(Set dst (AndV src (LoadVector mem))); 13635 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 13636 ins_encode %{ 13637 int vector_len = 0; 13638 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13639 %} 13640 ins_pipe( pipe_slow ); 13641 %} 13642 13643 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 13644 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13645 match(Set dst (AndV src1 src2)); 13646 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 13647 ins_encode %{ 13648 int vector_len = 1; 13649 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13650 %} 13651 ins_pipe( pipe_slow ); 13652 %} 13653 13654 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 13655 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13656 match(Set dst (AndV src (LoadVector mem))); 13657 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 13658 ins_encode %{ 13659 int vector_len = 1; 13660 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13661 %} 13662 ins_pipe( pipe_slow ); 13663 %} 13664 13665 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13666 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13667 match(Set dst (AndV src1 src2)); 13668 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 13669 ins_encode %{ 13670 int vector_len = 2; 13671 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13672 %} 13673 ins_pipe( pipe_slow ); 13674 %} 13675 13676 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 13677 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13678 match(Set dst (AndV src (LoadVector mem))); 13679 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 13680 ins_encode %{ 13681 int vector_len = 2; 13682 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13683 %} 13684 ins_pipe( pipe_slow ); 13685 %} 13686 13687 // --------------------------------- OR --------------------------------------- 13688 13689 instruct vor4B(vecS dst, vecS src) %{ 13690 predicate(n->as_Vector()->length_in_bytes() == 4); 13691 match(Set dst (OrV dst src)); 13692 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 13693 ins_encode %{ 13694 __ por($dst$$XMMRegister, $src$$XMMRegister); 13695 %} 13696 ins_pipe( pipe_slow ); 13697 %} 13698 13699 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 13700 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13701 match(Set dst (OrV src1 src2)); 13702 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 13703 ins_encode %{ 13704 int vector_len = 0; 13705 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13706 %} 13707 ins_pipe( pipe_slow ); 13708 %} 13709 13710 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 13711 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13712 match(Set dst (OrV src (LoadVector mem))); 13713 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 13714 ins_encode %{ 13715 int vector_len = 0; 13716 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13717 %} 13718 ins_pipe( pipe_slow ); 13719 %} 13720 13721 instruct vor8B(vecD dst, vecD src) %{ 13722 predicate(n->as_Vector()->length_in_bytes() == 8); 13723 match(Set dst (OrV dst src)); 13724 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 13725 ins_encode %{ 13726 __ por($dst$$XMMRegister, $src$$XMMRegister); 13727 %} 13728 ins_pipe( pipe_slow ); 13729 %} 13730 13731 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 13732 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13733 match(Set dst (OrV src1 src2)); 13734 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 13735 ins_encode %{ 13736 int vector_len = 0; 13737 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13738 %} 13739 ins_pipe( pipe_slow ); 13740 %} 13741 13742 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 13743 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13744 match(Set dst (OrV src (LoadVector mem))); 13745 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 13746 ins_encode %{ 13747 int vector_len = 0; 13748 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13749 %} 13750 ins_pipe( pipe_slow ); 13751 %} 13752 13753 instruct vor16B(vecX dst, vecX src) %{ 13754 predicate(n->as_Vector()->length_in_bytes() == 16); 13755 match(Set dst (OrV dst src)); 13756 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 13757 ins_encode %{ 13758 __ por($dst$$XMMRegister, $src$$XMMRegister); 13759 %} 13760 ins_pipe( pipe_slow ); 13761 %} 13762 13763 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 13764 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13765 match(Set dst (OrV src1 src2)); 13766 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 13767 ins_encode %{ 13768 int vector_len = 0; 13769 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13770 %} 13771 ins_pipe( pipe_slow ); 13772 %} 13773 13774 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 13775 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13776 match(Set dst (OrV src (LoadVector mem))); 13777 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 13778 ins_encode %{ 13779 int vector_len = 0; 13780 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13781 %} 13782 ins_pipe( pipe_slow ); 13783 %} 13784 13785 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 13786 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13787 match(Set dst (OrV src1 src2)); 13788 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 13789 ins_encode %{ 13790 int vector_len = 1; 13791 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13792 %} 13793 ins_pipe( pipe_slow ); 13794 %} 13795 13796 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 13797 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13798 match(Set dst (OrV src (LoadVector mem))); 13799 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 13800 ins_encode %{ 13801 int vector_len = 1; 13802 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13803 %} 13804 ins_pipe( pipe_slow ); 13805 %} 13806 13807 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13808 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13809 match(Set dst (OrV src1 src2)); 13810 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 13811 ins_encode %{ 13812 int vector_len = 2; 13813 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13814 %} 13815 ins_pipe( pipe_slow ); 13816 %} 13817 13818 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 13819 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13820 match(Set dst (OrV src (LoadVector mem))); 13821 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 13822 ins_encode %{ 13823 int vector_len = 2; 13824 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13825 %} 13826 ins_pipe( pipe_slow ); 13827 %} 13828 13829 // --------------------------------- XOR -------------------------------------- 13830 13831 instruct vxor4B(vecS dst, vecS src) %{ 13832 predicate(n->as_Vector()->length_in_bytes() == 4); 13833 match(Set dst (XorV dst src)); 13834 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 13835 ins_encode %{ 13836 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 13837 %} 13838 ins_pipe( pipe_slow ); 13839 %} 13840 13841 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 13842 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13843 match(Set dst (XorV src1 src2)); 13844 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 13845 ins_encode %{ 13846 int vector_len = 0; 13847 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13848 %} 13849 ins_pipe( pipe_slow ); 13850 %} 13851 13852 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 13853 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13854 match(Set dst (XorV src (LoadVector mem))); 13855 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 13856 ins_encode %{ 13857 int vector_len = 0; 13858 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13859 %} 13860 ins_pipe( pipe_slow ); 13861 %} 13862 13863 instruct vxor8B(vecD dst, vecD src) %{ 13864 predicate(n->as_Vector()->length_in_bytes() == 8); 13865 match(Set dst (XorV dst src)); 13866 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 13867 ins_encode %{ 13868 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 13869 %} 13870 ins_pipe( pipe_slow ); 13871 %} 13872 13873 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 13874 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13875 match(Set dst (XorV src1 src2)); 13876 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 13877 ins_encode %{ 13878 int vector_len = 0; 13879 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13880 %} 13881 ins_pipe( pipe_slow ); 13882 %} 13883 13884 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 13885 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13886 match(Set dst (XorV src (LoadVector mem))); 13887 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 13888 ins_encode %{ 13889 int vector_len = 0; 13890 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13891 %} 13892 ins_pipe( pipe_slow ); 13893 %} 13894 13895 instruct vxor16B(vecX dst, vecX src) %{ 13896 predicate(n->as_Vector()->length_in_bytes() == 16); 13897 match(Set dst (XorV dst src)); 13898 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 13899 ins_encode %{ 13900 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 13901 %} 13902 ins_pipe( pipe_slow ); 13903 %} 13904 13905 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 13906 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13907 match(Set dst (XorV src1 src2)); 13908 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 13909 ins_encode %{ 13910 int vector_len = 0; 13911 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13912 %} 13913 ins_pipe( pipe_slow ); 13914 %} 13915 13916 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 13917 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13918 match(Set dst (XorV src (LoadVector mem))); 13919 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 13920 ins_encode %{ 13921 int vector_len = 0; 13922 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13923 %} 13924 ins_pipe( pipe_slow ); 13925 %} 13926 13927 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 13928 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13929 match(Set dst (XorV src1 src2)); 13930 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 13931 ins_encode %{ 13932 int vector_len = 1; 13933 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13934 %} 13935 ins_pipe( pipe_slow ); 13936 %} 13937 13938 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 13939 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13940 match(Set dst (XorV src (LoadVector mem))); 13941 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 13942 ins_encode %{ 13943 int vector_len = 1; 13944 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13945 %} 13946 ins_pipe( pipe_slow ); 13947 %} 13948 13949 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13950 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13951 match(Set dst (XorV src1 src2)); 13952 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 13953 ins_encode %{ 13954 int vector_len = 2; 13955 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13956 %} 13957 ins_pipe( pipe_slow ); 13958 %} 13959 13960 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 13961 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13962 match(Set dst (XorV src (LoadVector mem))); 13963 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 13964 ins_encode %{ 13965 int vector_len = 2; 13966 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13967 %} 13968 ins_pipe( pipe_slow ); 13969 %} 13970 13971 instruct vcvt4Bto4S_reg(vecD dst, vecS src) %{ 13972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13973 match(Set dst (VectorCastB2X src)); 13974 format %{ "vpmovsxbw $dst,$src\t! convert 4B to 4S vector" %} 13975 ins_encode %{ 13976 int vector_len = 0; 13977 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 13978 %} 13979 ins_pipe( pipe_slow ); 13980 %} 13981 13982 instruct vcvt8Bto8S_reg(vecX dst, vecD src) %{ 13983 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13984 match(Set dst (VectorCastB2X src)); 13985 format %{ "vpmovsxbw $dst,$src\t! convert 8B to 8S vector" %} 13986 ins_encode %{ 13987 int vector_len = 0; 13988 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 13989 %} 13990 ins_pipe( pipe_slow ); 13991 %} 13992 13993 instruct vcvt16Bto16S_reg(vecY dst, vecX src) %{ 13994 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 13995 match(Set dst (VectorCastB2X src)); 13996 format %{ "vpmovsxbw $dst,$src\t! convert 16B to 16S vector" %} 13997 ins_encode %{ 13998 int vector_len = 1; 13999 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14000 %} 14001 ins_pipe( pipe_slow ); 14002 %} 14003 14004 instruct vcvt32Bto32S_reg(vecZ dst, vecY src) %{ 14005 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14006 match(Set dst (VectorCastB2X src)); 14007 format %{ "vpmovsxbw $dst,$src\t! convert 32B to 32S vector" %} 14008 ins_encode %{ 14009 int vector_len = 2; 14010 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14011 %} 14012 ins_pipe( pipe_slow ); 14013 %} 14014 14015 instruct vcvt4Bto4I_reg(vecX dst, vecS src) %{ 14016 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14017 match(Set dst (VectorCastB2X src)); 14018 format %{ "vpmovsxbd $dst,$src\t! convert 4B to 4I vector" %} 14019 ins_encode %{ 14020 int vector_len = 0; 14021 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14022 %} 14023 ins_pipe( pipe_slow ); 14024 %} 14025 14026 instruct vcvt8Bto8I_reg(vecY dst, vecD src) %{ 14027 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14028 match(Set dst (VectorCastB2X src)); 14029 format %{ "vpmovsxbd $dst,$src\t! convert 8B to 8I vector" %} 14030 ins_encode %{ 14031 int vector_len = 1; 14032 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14033 %} 14034 ins_pipe( pipe_slow ); 14035 %} 14036 14037 instruct vcvt16Bto16I_reg(vecZ dst, vecX src) %{ 14038 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14039 match(Set dst (VectorCastB2X src)); 14040 format %{ "vpmovsxbd $dst,$src\t! convert 16B to 16I vector" %} 14041 ins_encode %{ 14042 int vector_len = 2; 14043 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14044 %} 14045 ins_pipe( pipe_slow ); 14046 %} 14047 14048 instruct vcvt4Bto4L_reg(vecY dst, vecS src) %{ 14049 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14050 match(Set dst (VectorCastB2X src)); 14051 format %{ "vpmovsxbq $dst,$src\t! convert 4B to 4L vector" %} 14052 ins_encode %{ 14053 int vector_len = 1; 14054 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14055 %} 14056 ins_pipe( pipe_slow ); 14057 %} 14058 14059 instruct vcvt8Bto8L_reg(vecZ dst, vecD src) %{ 14060 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14061 match(Set dst (VectorCastB2X src)); 14062 format %{ "vpmovsxbq $dst,$src\t! convert 8B to 8L vector" %} 14063 ins_encode %{ 14064 int vector_len = 2; 14065 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14066 %} 14067 ins_pipe( pipe_slow ); 14068 %} 14069 14070 instruct vcvt4Bto4F_reg(vecX dst, vecS src) %{ 14071 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14072 match(Set dst (VectorCastB2X src)); 14073 format %{ "vpmovsxbd $dst,$src\n\t" 14074 "vcvtdq2ps $dst,$dst\t! convert 4B to 4F vector" %} 14075 ins_encode %{ 14076 int vector_len = 0; 14077 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14078 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14079 %} 14080 ins_pipe( pipe_slow ); 14081 %} 14082 14083 instruct vcvt8Bto8F_reg(vecY dst, vecD src) %{ 14084 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14085 match(Set dst (VectorCastB2X src)); 14086 format %{ "vpmovsxbd $dst,$src\n\t" 14087 "vcvtdq2ps $dst,$dst\t! convert 8B to 8F vector" %} 14088 ins_encode %{ 14089 int vector_len = 1; 14090 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14091 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14092 %} 14093 ins_pipe( pipe_slow ); 14094 %} 14095 14096 instruct vcvt16Bto16F_reg(vecZ dst, vecX src) %{ 14097 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14098 match(Set dst (VectorCastB2X src)); 14099 format %{ "vpmovsxbd $dst,$src\n\t" 14100 "vcvtdq2ps $dst,$dst\t! convert 16B to 16F vector" %} 14101 ins_encode %{ 14102 int vector_len = 2; 14103 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14104 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14105 %} 14106 ins_pipe( pipe_slow ); 14107 %} 14108 14109 instruct vcvt4Bto4D_reg(vecY dst, vecS src) %{ 14110 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14111 match(Set dst (VectorCastB2X src)); 14112 format %{ "vpmovsxbq $dst,$src\n\t" 14113 "vcvtdq2pd $dst,$dst\t! convert 4B to 4D vector" %} 14114 ins_encode %{ 14115 int vector_len = 1; 14116 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14117 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14118 %} 14119 ins_pipe( pipe_slow ); 14120 %} 14121 14122 instruct vcvt8Bto8D_reg(vecZ dst, vecD src) %{ 14123 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14124 match(Set dst (VectorCastB2X src)); 14125 format %{ "vpmovsxbq $dst,$src\n\t" 14126 "vcvtdq2pd $dst,$dst\t! convert 8B to 8D vector" %} 14127 ins_encode %{ 14128 int vector_len = 2; 14129 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14130 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14131 %} 14132 ins_pipe( pipe_slow ); 14133 %} 14134 14135 instruct vcvt4Sto4B_reg(vecS dst, vecD src, rRegL scratch) %{ 14136 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14137 effect(TEMP scratch); 14138 match(Set dst (VectorCastS2X src)); 14139 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 14140 "vpackuswb $dst,$dst\t! convert 4S to 4B vector" %} 14141 ins_encode %{ 14142 int vector_len = 0; 14143 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 14144 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14145 %} 14146 ins_pipe( pipe_slow ); 14147 %} 14148 14149 instruct vcvt8Sto8B_reg(vecD dst, vecX src, rRegL scratch) %{ 14150 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14151 effect(TEMP scratch); 14152 match(Set dst (VectorCastS2X src)); 14153 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 14154 "vpackuswb $dst,$dst\t! convert 8S to 8B vector" %} 14155 ins_encode %{ 14156 int vector_len = 0; 14157 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 14158 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14159 %} 14160 ins_pipe( pipe_slow ); 14161 %} 14162 14163 instruct vcvt16Sto16B_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ 14164 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14165 effect(TEMP scratch, TEMP tmp); 14166 match(Set dst (VectorCastS2X src)); 14167 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 14168 "vextracti128 $tmp,$dst,0x1\n\t" 14169 "vpackuswb $dst,$dst,$tmp\t! convert 16S to 16B vector" %} 14170 ins_encode %{ 14171 int vector_len = 1; 14172 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 14173 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14174 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14175 %} 14176 ins_pipe( pipe_slow ); 14177 %} 14178 14179 instruct vcvt32Sto32B_reg(vecY dst, vecZ src) %{ 14180 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14181 match(Set dst (VectorCastS2X src)); 14182 format %{ "evpmovwb $dst,$src\t! convert 32S to 32B vector" %} 14183 ins_encode %{ 14184 int vector_len = 2; 14185 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14186 %} 14187 ins_pipe( pipe_slow ); 14188 %} 14189 14190 instruct vcvt2Sto2I_reg(vecD dst, vecS src) %{ 14191 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14192 match(Set dst (VectorCastS2X src)); 14193 format %{ "vpmovsxwd $dst,$src\t! convert 2S to 2I vector" %} 14194 ins_encode %{ 14195 int vector_len = 0; 14196 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14197 %} 14198 ins_pipe( pipe_slow ); 14199 %} 14200 14201 instruct vcvt4Sto4I_reg(vecX dst, vecD src) %{ 14202 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14203 match(Set dst (VectorCastS2X src)); 14204 format %{ "vpmovsxwd $dst,$src\t! convert 4S to 4I vector" %} 14205 ins_encode %{ 14206 int vector_len = 0; 14207 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14208 %} 14209 ins_pipe( pipe_slow ); 14210 %} 14211 14212 instruct vcvt8Sto8I_reg(vecY dst, vecX src) %{ 14213 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14214 match(Set dst (VectorCastS2X src)); 14215 format %{ "vpmovsxwd $dst,$src\t! convert 8S to 8I vector" %} 14216 ins_encode %{ 14217 int vector_len = 1; 14218 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14219 %} 14220 ins_pipe( pipe_slow ); 14221 %} 14222 14223 instruct vcvt16Sto16I_reg(vecZ dst, vecY src) %{ 14224 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14225 match(Set dst (VectorCastS2X src)); 14226 format %{ "vpmovsxwd $dst,$src\t! convert 16S to 16I vector" %} 14227 ins_encode %{ 14228 int vector_len = 2; 14229 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14230 %} 14231 ins_pipe( pipe_slow ); 14232 %} 14233 14234 instruct vcvt2Sto2L_reg(vecX dst, vecS src) %{ 14235 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14236 match(Set dst (VectorCastS2X src)); 14237 format %{ "vpmovsxwq $dst,$src\t! convert 2S to 2L vector" %} 14238 ins_encode %{ 14239 int vector_len = 0; 14240 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14241 %} 14242 ins_pipe( pipe_slow ); 14243 %} 14244 14245 instruct vcvt4Sto4L_reg(vecY dst, vecD src) %{ 14246 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14247 match(Set dst (VectorCastS2X src)); 14248 format %{ "vpmovsxwq $dst,$src\t! convert 4S to 4L vector" %} 14249 ins_encode %{ 14250 int vector_len = 1; 14251 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14252 %} 14253 ins_pipe( pipe_slow ); 14254 %} 14255 14256 instruct vcvt8Sto8L_reg(vecZ dst, vecX src) %{ 14257 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14258 match(Set dst (VectorCastS2X src)); 14259 format %{ "vpmovsxwq $dst,$src\t! convert 8S to 8L vector" %} 14260 ins_encode %{ 14261 int vector_len = 2; 14262 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14263 %} 14264 ins_pipe( pipe_slow ); 14265 %} 14266 14267 instruct vcvt2Sto2F_reg(vecD dst, vecS src) %{ 14268 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14269 match(Set dst (VectorCastS2X src)); 14270 format %{ "vpmovsxwd $dst,$src\n\t" 14271 "vcvtdq2ps $dst,$dst\t! convert 2S to 2F vector" %} 14272 ins_encode %{ 14273 int vector_len = 0; 14274 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14275 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14276 %} 14277 ins_pipe( pipe_slow ); 14278 %} 14279 14280 instruct vcvt4Sto4F_reg(vecX dst, vecD src) %{ 14281 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14282 match(Set dst (VectorCastS2X src)); 14283 format %{ "vpmovsxwd $dst,$src\n\t" 14284 "vcvtdq2ps $dst,$dst\t! convert 4S to 4F vector" %} 14285 ins_encode %{ 14286 int vector_len = 0; 14287 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14288 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14289 %} 14290 ins_pipe( pipe_slow ); 14291 %} 14292 14293 instruct vcvt8Sto8F_reg(vecY dst, vecX src) %{ 14294 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14295 match(Set dst (VectorCastS2X src)); 14296 format %{ "vpmovsxwd $dst,$src\n\t" 14297 "vcvtdq2ps $dst,$dst\t! convert 8S to 8F vector" %} 14298 ins_encode %{ 14299 int vector_len = 1; 14300 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14301 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14302 %} 14303 ins_pipe( pipe_slow ); 14304 %} 14305 14306 instruct vcvt16Sto16F_reg(vecZ dst, vecY src) %{ 14307 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14308 match(Set dst (VectorCastS2X src)); 14309 format %{ "vpmovsxwd $dst,$src\n\t" 14310 "vcvtdq2ps $dst,$dst\t! convert 16S to 16F vector" %} 14311 ins_encode %{ 14312 int vector_len = 2; 14313 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14314 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14315 %} 14316 ins_pipe( pipe_slow ); 14317 %} 14318 14319 instruct vcvt2Sto2D_reg(vecX dst, vecS src) %{ 14320 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14321 match(Set dst (VectorCastS2X src)); 14322 format %{ "vpmovsxwd $dst,$src\n\t" 14323 "vcvtdq2pd $dst,$dst\t! convert 2S to 2D vector" %} 14324 ins_encode %{ 14325 int vector_len = 0; 14326 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14327 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14328 %} 14329 ins_pipe( pipe_slow ); 14330 %} 14331 14332 instruct vcvt4Sto4D_reg(vecY dst, vecD src) %{ 14333 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14334 match(Set dst (VectorCastS2X src)); 14335 format %{ "vpmovsxwd $dst,$src\n\t" 14336 "vcvtdq2pd $dst,$dst\t! convert 4S to 4D vector" %} 14337 ins_encode %{ 14338 int vector_len = 1; 14339 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14340 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14341 %} 14342 ins_pipe( pipe_slow ); 14343 %} 14344 14345 instruct vcvt8Sto8D_reg(vecZ dst, vecX src) %{ 14346 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14347 match(Set dst (VectorCastS2X src)); 14348 format %{ "vpmovsxwd $dst,$src\n\t" 14349 "vcvtdq2pd $dst,$dst\t! convert 8S to 8D vector" %} 14350 ins_encode %{ 14351 int vector_len = 2; 14352 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14353 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14354 %} 14355 ins_pipe( pipe_slow ); 14356 %} 14357 14358 instruct vcvt4Ito4B_reg(vecS dst, vecX src, rRegL scratch) %{ 14359 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14360 effect(TEMP scratch); 14361 match(Set dst (VectorCastI2X src)); 14362 format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" 14363 "vpackusdw $dst,$dst\n\t" 14364 "vpackuswb $dst,$dst\t! convert 4I to 4B vector" %} 14365 ins_encode %{ 14366 int vector_len = 0; 14367 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 14368 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14369 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14370 %} 14371 ins_pipe( pipe_slow ); 14372 %} 14373 14374 instruct vcvt8Ito8B_reg(vecD dst, vecY src, vecY tmp, rRegL scratch) %{ 14375 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14376 effect(TEMP scratch, TEMP tmp); 14377 match(Set dst (VectorCastI2X src)); 14378 format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" 14379 "vextracti128 $tmp,$dst,0x1\n\t" 14380 "vpackusdw $dst,$dst,$tmp\n\t" 14381 "vpackuswb $dst,$dst\t! convert 8I to 8B vector" %} 14382 ins_encode %{ 14383 int vector_len = 1; 14384 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 14385 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14386 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14387 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 14388 %} 14389 ins_pipe( pipe_slow ); 14390 %} 14391 14392 instruct vcvt16Ito16B_reg(vecX dst, vecZ src) %{ 14393 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14394 match(Set dst (VectorCastI2X src)); 14395 format %{ "evpmovdb $dst,$src\t! convert 16I to 16B vector" %} 14396 ins_encode %{ 14397 int vector_len = 2; 14398 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14399 %} 14400 ins_pipe( pipe_slow ); 14401 %} 14402 14403 instruct vcvt2Ito2S_reg(vecS dst, vecD src, rRegL scratch) %{ 14404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14405 effect(TEMP scratch); 14406 match(Set dst (VectorCastI2X src)); 14407 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 14408 "vpackusdw $dst,$dst\t! convert 2I to 2S vector" %} 14409 ins_encode %{ 14410 int vector_len = 0; 14411 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 14412 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14413 %} 14414 ins_pipe( pipe_slow ); 14415 %} 14416 14417 instruct vcvt4Ito4S_reg(vecD dst, vecX src, rRegL scratch) %{ 14418 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14419 effect(TEMP scratch); 14420 match(Set dst (VectorCastI2X src)); 14421 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 14422 "vpackusdw $dst,$dst\t! convert 4I to 4S vector" %} 14423 ins_encode %{ 14424 int vector_len = 0; 14425 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 14426 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14427 %} 14428 ins_pipe( pipe_slow ); 14429 %} 14430 14431 instruct vcvt8Ito8S_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ 14432 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14433 effect(TEMP scratch, TEMP tmp); 14434 match(Set dst (VectorCastI2X src)); 14435 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 14436 "vextracti128 $tmp,$dst,0x1\n\t" 14437 "vpackusdw $dst,$dst,$tmp\t! convert 8I to 8S vector" %} 14438 ins_encode %{ 14439 int vector_len = 1; 14440 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 14441 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 14442 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 14443 %} 14444 ins_pipe( pipe_slow ); 14445 %} 14446 14447 instruct vcvt16Ito16S_reg(vecY dst, vecZ src) %{ 14448 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14449 match(Set dst (VectorCastI2X src)); 14450 format %{ "evpmovdw $dst,$src\t! convert 16I to 16S vector" %} 14451 ins_encode %{ 14452 int vector_len = 2; 14453 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14454 %} 14455 ins_pipe( pipe_slow ); 14456 %} 14457 14458 instruct vcvt2Ito2L_reg(vecX dst, vecD src) %{ 14459 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14460 match(Set dst (VectorCastI2X src)); 14461 format %{ "vpmovsxdq $dst,$src\t! convert 2I to 2L vector" %} 14462 ins_encode %{ 14463 int vector_len = 0; 14464 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14465 %} 14466 ins_pipe( pipe_slow ); 14467 %} 14468 14469 instruct vcvt4Ito4L_reg(vecY dst, vecX src) %{ 14470 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14471 match(Set dst (VectorCastI2X src)); 14472 format %{ "vpmovsxdq $dst,$src\t! convert 4I to 4L vector" %} 14473 ins_encode %{ 14474 int vector_len = 1; 14475 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14476 %} 14477 ins_pipe( pipe_slow ); 14478 %} 14479 14480 instruct vcvt8Ito8L_reg(vecZ dst, vecY src) %{ 14481 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14482 match(Set dst (VectorCastI2X src)); 14483 format %{ "vpmovsxdq $dst,$src\t! convert 8I to 8L vector" %} 14484 ins_encode %{ 14485 int vector_len = 2; 14486 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14487 %} 14488 ins_pipe( pipe_slow ); 14489 %} 14490 14491 instruct vcvt2Ito2F_reg(vecD dst, vecD src) %{ 14492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14493 match(Set dst (VectorCastI2X src)); 14494 format %{ "vcvtdq2ps $dst,$src\t! convert 2I to 2F vector" %} 14495 ins_encode %{ 14496 int vector_len = 0; 14497 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14498 %} 14499 ins_pipe( pipe_slow ); 14500 %} 14501 14502 instruct vcvt4Ito4F_reg(vecX dst, vecX src) %{ 14503 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14504 match(Set dst (VectorCastI2X src)); 14505 format %{ "vcvtdq2ps $dst,$src\t! convert 4I to 4F vector" %} 14506 ins_encode %{ 14507 int vector_len = 0; 14508 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14509 %} 14510 ins_pipe( pipe_slow ); 14511 %} 14512 14513 instruct vcvt8Ito8F_reg(vecY dst, vecY src) %{ 14514 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14515 match(Set dst (VectorCastI2X src)); 14516 format %{ "vcvtdq2ps $dst,$src\t! convert 8I to 8F vector" %} 14517 ins_encode %{ 14518 int vector_len = 1; 14519 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14520 %} 14521 ins_pipe( pipe_slow ); 14522 %} 14523 14524 instruct vcvt16Ito16F_reg(vecY dst, vecY src) %{ 14525 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14526 match(Set dst (VectorCastI2X src)); 14527 format %{ "vcvtdq2ps $dst,$src\t! convert 16I to 16F vector" %} 14528 ins_encode %{ 14529 int vector_len = 2; 14530 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14531 %} 14532 ins_pipe( pipe_slow ); 14533 %} 14534 14535 instruct vcvt2Ito2D_reg(vecX dst, vecD src) %{ 14536 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14537 match(Set dst (VectorCastI2X src)); 14538 format %{ "vcvtdq2pd $dst,$src\t! convert 2I to 2D vector" %} 14539 ins_encode %{ 14540 int vector_len = 0; 14541 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14542 %} 14543 ins_pipe( pipe_slow ); 14544 %} 14545 14546 instruct vcvt4Ito4D_reg(vecY dst, vecX src) %{ 14547 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14548 match(Set dst (VectorCastI2X src)); 14549 format %{ "vcvtdq2pd $dst,$src\t! convert 4I to 4D vector" %} 14550 ins_encode %{ 14551 int vector_len = 1; 14552 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14553 %} 14554 ins_pipe( pipe_slow ); 14555 %} 14556 14557 instruct vcvt8Ito8D_reg(vecZ dst, vecY src) %{ 14558 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14559 match(Set dst (VectorCastI2X src)); 14560 format %{ "vcvtdq2pd $dst,$src\t! convert 8I to 8D vector" %} 14561 ins_encode %{ 14562 int vector_len = 2; 14563 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14564 %} 14565 ins_pipe( pipe_slow ); 14566 %} 14567 14568 instruct vcvt4Lto4B_reg(vecS dst, vecY src, rRegL scratch) %{ 14569 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14570 match(Set dst (VectorCastL2X src)); 14571 effect(TEMP scratch); 14572 format %{ "vpermilps $dst,$src,8\n\t" 14573 "vpermpd $dst,$dst,8\n\t" 14574 "vpand $dst,$dst,[0x000000FF000000FF]\n\t" 14575 "vpackusdw $dst,$dst\n\t" 14576 "vpackuswb $dst,$dst\t! convert 4L to 4B vector" %} 14577 ins_encode %{ 14578 int vector_len = 1; 14579 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 14580 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 14581 // Since cast to int has been done, do rest of operations in 128. 14582 vector_len = 0; 14583 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 14584 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14585 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14586 %} 14587 ins_pipe( pipe_slow ); 14588 %} 14589 14590 instruct vcvt8Lto8B_reg(vecD dst, vecZ src) %{ 14591 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14592 match(Set dst (VectorCastL2X src)); 14593 format %{ "evpmovqb $dst,$src\t! convert 8L to 8B vector" %} 14594 ins_encode %{ 14595 int vector_len = 2; 14596 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14597 %} 14598 ins_pipe( pipe_slow ); 14599 %} 14600 14601 instruct vcvt2Lto2S_reg(vecS dst, vecX src, rRegL scratch) %{ 14602 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14603 match(Set dst (VectorCastL2X src)); 14604 effect(TEMP scratch); 14605 format %{ "vpshufd $dst,$src,8\n\t" 14606 "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" 14607 "vpackusdw $dst,$dst\t! convert 2L to 2S vector" %} 14608 ins_encode %{ 14609 int vector_len = 0; 14610 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 14611 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 14612 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14613 %} 14614 ins_pipe( pipe_slow ); 14615 %} 14616 14617 instruct vcvt4Lto4S_reg(vecD dst, vecY src, rRegL scratch) %{ 14618 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14619 match(Set dst (VectorCastL2X src)); 14620 effect(TEMP scratch); 14621 format %{ "vpermilps $dst,$src,8\n\t" 14622 "vpermpd $dst,$dst,8\n\t" 14623 "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" 14624 "vpackusdw $dst,$dst\t! convert 4L to 4S vector" %} 14625 ins_encode %{ 14626 int vector_len = 1; 14627 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 14628 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 14629 // Since cast to int has been done, do rest of operations in 128. 14630 vector_len = 0; 14631 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 14632 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 14633 %} 14634 ins_pipe( pipe_slow ); 14635 %} 14636 14637 instruct vcvt8Lto8S_reg(vecX dst, vecZ src) %{ 14638 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14639 match(Set dst (VectorCastL2X src)); 14640 format %{ "evpmovqw $dst,$src\t! convert 8L to 8S vector" %} 14641 ins_encode %{ 14642 int vector_len = 2; 14643 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14644 %} 14645 ins_pipe( pipe_slow ); 14646 %} 14647 14648 instruct vcvt1Lto1I_reg(vecS dst, vecD src) %{ 14649 predicate(n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14650 match(Set dst (VectorCastL2X src)); 14651 format %{ "movdqu $dst,$src\t! convert 1L to 1I vector" %} 14652 ins_encode %{ 14653 // If register is the same, then move is not needed. 14654 if ($dst$$XMMRegister != $src$$XMMRegister) { 14655 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 14656 } 14657 %} 14658 ins_pipe( pipe_slow ); 14659 %} 14660 14661 instruct vcvt2Lto2I_reg(vecD dst, vecX src) %{ 14662 predicate(UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14663 match(Set dst (VectorCastL2X src)); 14664 format %{ "pshufd $dst,$src,8\t! convert 2L to 2I vector" %} 14665 ins_encode %{ 14666 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 14667 %} 14668 ins_pipe( pipe_slow ); 14669 %} 14670 14671 instruct vcvt2Lto2I_reg_avx(vecD dst, vecX src) %{ 14672 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14673 match(Set dst (VectorCastL2X src)); 14674 format %{ "vpshufd $dst,$src,8\t! convert 2L to 2I vector" %} 14675 ins_encode %{ 14676 int vector_len = 0; 14677 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 14678 %} 14679 ins_pipe( pipe_slow ); 14680 %} 14681 14682 instruct vcvt4Lto4I_reg(vecX dst, vecY src) %{ 14683 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14684 match(Set dst (VectorCastL2X src)); 14685 format %{ "vpermilps $dst,$src,8\n\t" 14686 "vpermpd $dst,$dst,8\t! convert 4L to 4I vector" %} 14687 ins_encode %{ 14688 int vector_len = 1; 14689 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 14690 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 14691 %} 14692 ins_pipe( pipe_slow ); 14693 %} 14694 14695 instruct vcvt8Lto8I_reg(vecY dst, vecZ src) %{ 14696 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14697 match(Set dst (VectorCastL2X src)); 14698 format %{ "evpmovqd $dst,$src\t! convert 8L to 8I vector" %} 14699 ins_encode %{ 14700 int vector_len = 2; 14701 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14702 %} 14703 ins_pipe( pipe_slow ); 14704 %} 14705 14706 instruct vcvt2Lto2F_reg(vecD dst, vecX src) %{ 14707 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14708 match(Set dst (VectorCastL2X src)); 14709 format %{ "vcvtqq2ps $dst,$src\t! convert 2L to 2F vector" %} 14710 ins_encode %{ 14711 int vector_len = 0; 14712 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14713 %} 14714 ins_pipe( pipe_slow ); 14715 %} 14716 14717 instruct vcvt4Lto4F_reg(vecX dst, vecY src) %{ 14718 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14719 match(Set dst (VectorCastL2X src)); 14720 format %{ "vcvtqq2ps $dst,$src\t! convert 4L to 4F vector" %} 14721 ins_encode %{ 14722 int vector_len = 1; 14723 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14724 %} 14725 ins_pipe( pipe_slow ); 14726 %} 14727 14728 instruct vcvt8Lto8F_reg(vecY dst, vecZ src) %{ 14729 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14730 match(Set dst (VectorCastL2X src)); 14731 format %{ "vcvtqq2ps $dst,$src\t! convert 8L to 8F vector" %} 14732 ins_encode %{ 14733 int vector_len = 2; 14734 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14735 %} 14736 ins_pipe( pipe_slow ); 14737 %} 14738 14739 instruct vcvt1Lto1D_reg(vecD dst, vecD src) %{ 14740 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14741 match(Set dst (VectorCastL2X src)); 14742 format %{ "vcvtqq2pd $dst,$src\t! convert 1L to 1D vector" %} 14743 ins_encode %{ 14744 int vector_len = 0; 14745 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14746 %} 14747 ins_pipe( pipe_slow ); 14748 %} 14749 14750 instruct vcvt2Lto2D_reg(vecX dst, vecX src) %{ 14751 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14752 match(Set dst (VectorCastL2X src)); 14753 format %{ "vcvtqq2pd $dst,$src\t! convert 2L to 2D vector" %} 14754 ins_encode %{ 14755 int vector_len = 0; 14756 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14757 %} 14758 ins_pipe( pipe_slow ); 14759 %} 14760 14761 instruct vcvt4Lto4D_reg(vecY dst, vecY src) %{ 14762 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14763 match(Set dst (VectorCastL2X src)); 14764 format %{ "vcvtqq2pd $dst,$src\t! convert 4L to 4D vector" %} 14765 ins_encode %{ 14766 int vector_len = 1; 14767 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14768 %} 14769 ins_pipe( pipe_slow ); 14770 %} 14771 14772 instruct vcvt8Lto8D_reg(vecZ dst, vecZ src) %{ 14773 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14774 match(Set dst (VectorCastL2X src)); 14775 format %{ "vcvtqq2pd $dst,$src\t! convert 8L to 8D vector" %} 14776 ins_encode %{ 14777 int vector_len = 2; 14778 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14779 %} 14780 ins_pipe( pipe_slow ); 14781 %} 14782 14783 instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ 14784 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14785 match(Set dst (VectorCastF2X src)); 14786 format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} 14787 ins_encode %{ 14788 int vector_len = 0; 14789 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14790 %} 14791 ins_pipe( pipe_slow ); 14792 %} 14793 14794 instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ 14795 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14796 match(Set dst (VectorCastF2X src)); 14797 format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} 14798 ins_encode %{ 14799 int vector_len = 1; 14800 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14801 %} 14802 ins_pipe( pipe_slow ); 14803 %} 14804 14805 instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ 14806 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14807 match(Set dst (VectorCastF2X src)); 14808 format %{ "evcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} 14809 ins_encode %{ 14810 int vector_len = 2; 14811 __ evcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14812 %} 14813 ins_pipe( pipe_slow ); 14814 %} 14815 14816 instruct vcvt2Dto2F_reg(vecD dst, vecX src) %{ 14817 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14818 match(Set dst (VectorCastD2X src)); 14819 format %{ "vcvtpd2ps $dst,$src\t! convert 2D to 2F vector" %} 14820 ins_encode %{ 14821 int vector_len = 0; 14822 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14823 %} 14824 ins_pipe( pipe_slow ); 14825 %} 14826 14827 instruct vcvt4Dto4F_reg(vecX dst, vecY src) %{ 14828 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14829 match(Set dst (VectorCastD2X src)); 14830 format %{ "vcvtpd2ps $dst,$src\t! convert 4D to 4F vector" %} 14831 ins_encode %{ 14832 int vector_len = 1; 14833 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14834 %} 14835 ins_pipe( pipe_slow ); 14836 %} 14837 14838 instruct vcvt8Dto8F_reg(vecY dst, vecZ src) %{ 14839 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14840 match(Set dst (VectorCastD2X src)); 14841 format %{ "evcvtpd2ps $dst,$src\t! convert 8D to 8F vector" %} 14842 ins_encode %{ 14843 int vector_len = 2; 14844 __ evcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 14845 %} 14846 ins_pipe( pipe_slow ); 14847 %} 14848 14849 instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ 14850 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14851 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14852 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14853 match(Set dst (VectorMaskCmp src1 src2)); 14854 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} 14855 ins_encode %{ 14856 int vector_len = 0; 14857 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14858 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14859 %} 14860 ins_pipe( pipe_slow ); 14861 %} 14862 14863 instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ 14864 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14865 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14866 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14867 match(Set dst (VectorMaskCmp src1 src2)); 14868 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} 14869 ins_encode %{ 14870 int vector_len = 0; 14871 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14872 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14873 %} 14874 ins_pipe( pipe_slow ); 14875 %} 14876 14877 instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ 14878 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14879 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14880 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14881 match(Set dst (VectorMaskCmp src1 src2)); 14882 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} 14883 ins_encode %{ 14884 int vector_len = 1; 14885 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14886 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14887 %} 14888 ins_pipe( pipe_slow ); 14889 %} 14890 14891 instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14892 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14893 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14894 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14895 match(Set dst (VectorMaskCmp src1 src2)); 14896 effect(TEMP dst, TEMP scratch); 14897 format %{ "vcmpeqps k2,$src1,$src2\n\t" 14898 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} 14899 ins_encode %{ 14900 int vector_len = 2; 14901 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14902 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14903 KRegister mask = k0; // The comparison itself is not being masked. 14904 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14905 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14906 %} 14907 ins_pipe( pipe_slow ); 14908 %} 14909 14910 instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ 14911 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14912 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14913 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14914 match(Set dst (VectorMaskCmp src1 src2)); 14915 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} 14916 ins_encode %{ 14917 int vector_len = 0; 14918 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14919 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14920 %} 14921 ins_pipe( pipe_slow ); 14922 %} 14923 14924 instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ 14925 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14926 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14927 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14928 match(Set dst (VectorMaskCmp src1 src2)); 14929 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} 14930 ins_encode %{ 14931 int vector_len = 0; 14932 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14933 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14934 %} 14935 ins_pipe( pipe_slow ); 14936 %} 14937 14938 instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ 14939 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14940 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14941 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14942 match(Set dst (VectorMaskCmp src1 src2)); 14943 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} 14944 ins_encode %{ 14945 int vector_len = 1; 14946 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14947 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14948 %} 14949 ins_pipe( pipe_slow ); 14950 %} 14951 14952 instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14953 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14954 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14955 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14956 match(Set dst (VectorMaskCmp src1 src2)); 14957 effect(TEMP dst, TEMP scratch); 14958 format %{ "vcmpltps k2,$src1,$src2\n\t" 14959 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} 14960 ins_encode %{ 14961 int vector_len = 2; 14962 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14963 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14964 KRegister mask = k0; // The comparison itself is not being masked. 14965 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14966 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14967 %} 14968 ins_pipe( pipe_slow ); 14969 %} 14970 14971 instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ 14972 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14973 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14974 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14975 match(Set dst (VectorMaskCmp src1 src2)); 14976 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} 14977 ins_encode %{ 14978 int vector_len = 0; 14979 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14980 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14981 %} 14982 ins_pipe( pipe_slow ); 14983 %} 14984 14985 instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ 14986 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14987 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14988 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14989 match(Set dst (VectorMaskCmp src1 src2)); 14990 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} 14991 ins_encode %{ 14992 int vector_len = 0; 14993 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14994 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14995 %} 14996 ins_pipe( pipe_slow ); 14997 %} 14998 14999 instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ 15000 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15001 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15002 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15003 match(Set dst (VectorMaskCmp src1 src2)); 15004 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} 15005 ins_encode %{ 15006 int vector_len = 1; 15007 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 15008 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15009 %} 15010 ins_pipe( pipe_slow ); 15011 %} 15012 15013 instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15014 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15015 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15016 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15017 match(Set dst (VectorMaskCmp src1 src2)); 15018 effect(TEMP dst, TEMP scratch); 15019 format %{ "vcmpgtps k2,$src1,$src2\n\t" 15020 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} 15021 ins_encode %{ 15022 int vector_len = 2; 15023 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 15024 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15025 KRegister mask = k0; // The comparison itself is not being masked. 15026 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15027 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15028 %} 15029 ins_pipe( pipe_slow ); 15030 %} 15031 15032 instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ 15033 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15034 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15035 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15036 match(Set dst (VectorMaskCmp src1 src2)); 15037 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} 15038 ins_encode %{ 15039 int vector_len = 0; 15040 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15041 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15042 %} 15043 ins_pipe( pipe_slow ); 15044 %} 15045 15046 instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ 15047 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15048 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15049 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15050 match(Set dst (VectorMaskCmp src1 src2)); 15051 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} 15052 ins_encode %{ 15053 int vector_len = 0; 15054 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15055 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15056 %} 15057 ins_pipe( pipe_slow ); 15058 %} 15059 15060 instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ 15061 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15062 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15063 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15064 match(Set dst (VectorMaskCmp src1 src2)); 15065 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} 15066 ins_encode %{ 15067 int vector_len = 1; 15068 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15069 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15070 %} 15071 ins_pipe( pipe_slow ); 15072 %} 15073 15074 instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15075 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15076 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15077 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15078 match(Set dst (VectorMaskCmp src1 src2)); 15079 effect(TEMP dst, TEMP scratch); 15080 format %{ "vcmpgeps k2,$src1,$src2\n\t" 15081 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} 15082 ins_encode %{ 15083 int vector_len = 2; 15084 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15085 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15086 KRegister mask = k0; // The comparison itself is not being masked. 15087 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15088 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15089 %} 15090 ins_pipe( pipe_slow ); 15091 %} 15092 15093 instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ 15094 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15095 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15096 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15097 match(Set dst (VectorMaskCmp src1 src2)); 15098 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} 15099 ins_encode %{ 15100 int vector_len = 0; 15101 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15102 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15103 %} 15104 ins_pipe( pipe_slow ); 15105 %} 15106 15107 instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ 15108 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15109 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15110 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15111 match(Set dst (VectorMaskCmp src1 src2)); 15112 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} 15113 ins_encode %{ 15114 int vector_len = 0; 15115 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15116 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15117 %} 15118 ins_pipe( pipe_slow ); 15119 %} 15120 15121 instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ 15122 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15123 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15124 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15125 match(Set dst (VectorMaskCmp src1 src2)); 15126 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} 15127 ins_encode %{ 15128 int vector_len = 1; 15129 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15130 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15131 %} 15132 ins_pipe( pipe_slow ); 15133 %} 15134 15135 instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15136 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15137 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15138 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15139 match(Set dst (VectorMaskCmp src1 src2)); 15140 effect(TEMP dst, TEMP scratch); 15141 format %{ "vcmpleps k2,$src1,$src2\n\t" 15142 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} 15143 ins_encode %{ 15144 int vector_len = 2; 15145 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15146 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15147 KRegister mask = k0; // The comparison itself is not being masked. 15148 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15149 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15150 %} 15151 ins_pipe( pipe_slow ); 15152 %} 15153 15154 instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ 15155 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15156 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15157 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15158 match(Set dst (VectorMaskCmp src1 src2)); 15159 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} 15160 ins_encode %{ 15161 int vector_len = 0; 15162 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15163 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15164 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15165 %} 15166 ins_pipe( pipe_slow ); 15167 %} 15168 15169 instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ 15170 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15171 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15172 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15173 match(Set dst (VectorMaskCmp src1 src2)); 15174 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} 15175 ins_encode %{ 15176 int vector_len = 0; 15177 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15178 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15179 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15180 %} 15181 ins_pipe( pipe_slow ); 15182 %} 15183 15184 instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ 15185 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15186 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15187 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15188 match(Set dst (VectorMaskCmp src1 src2)); 15189 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} 15190 ins_encode %{ 15191 int vector_len = 1; 15192 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15193 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15194 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15195 %} 15196 ins_pipe( pipe_slow ); 15197 %} 15198 15199 instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15200 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15201 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15202 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15203 match(Set dst (VectorMaskCmp src1 src2)); 15204 effect(TEMP dst, TEMP scratch); 15205 format %{ "vcmpneps k2,$src1,$src2\n\t" 15206 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} 15207 ins_encode %{ 15208 int vector_len = 2; 15209 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15210 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15211 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15212 KRegister mask = k0; // The comparison itself is not being masked. 15213 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15214 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15215 %} 15216 ins_pipe( pipe_slow ); 15217 %} 15218 15219 instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ 15220 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15221 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15222 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15223 match(Set dst (VectorMaskCmp src1 src2)); 15224 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} 15225 ins_encode %{ 15226 int vector_len = 0; 15227 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 15228 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15229 %} 15230 ins_pipe( pipe_slow ); 15231 %} 15232 15233 instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ 15234 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15235 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15236 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15237 match(Set dst (VectorMaskCmp src1 src2)); 15238 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} 15239 ins_encode %{ 15240 int vector_len = 0; 15241 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 15242 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15243 %} 15244 ins_pipe( pipe_slow ); 15245 %} 15246 15247 instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ 15248 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15249 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15250 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15251 match(Set dst (VectorMaskCmp src1 src2)); 15252 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} 15253 ins_encode %{ 15254 int vector_len = 1; 15255 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 15256 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15257 %} 15258 ins_pipe( pipe_slow ); 15259 %} 15260 15261 instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15262 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15263 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15264 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15265 match(Set dst (VectorMaskCmp src1 src2)); 15266 effect(TEMP dst, TEMP scratch); 15267 format %{ "vcmpeqpd k2,$src1,$src2\n\t" 15268 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} 15269 ins_encode %{ 15270 int vector_len = 2; 15271 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 15272 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15273 KRegister mask = k0; // The comparison itself is not being masked. 15274 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15275 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15276 %} 15277 ins_pipe( pipe_slow ); 15278 %} 15279 15280 instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ 15281 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15282 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15283 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15284 match(Set dst (VectorMaskCmp src1 src2)); 15285 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} 15286 ins_encode %{ 15287 int vector_len = 0; 15288 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 15289 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15290 %} 15291 ins_pipe( pipe_slow ); 15292 %} 15293 15294 instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ 15295 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15296 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15297 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15298 match(Set dst (VectorMaskCmp src1 src2)); 15299 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} 15300 ins_encode %{ 15301 int vector_len = 0; 15302 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 15303 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15304 %} 15305 ins_pipe( pipe_slow ); 15306 %} 15307 15308 instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ 15309 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15310 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15311 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15312 match(Set dst (VectorMaskCmp src1 src2)); 15313 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} 15314 ins_encode %{ 15315 int vector_len = 1; 15316 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 15317 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15318 %} 15319 ins_pipe( pipe_slow ); 15320 %} 15321 15322 instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15323 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15324 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15325 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15326 match(Set dst (VectorMaskCmp src1 src2)); 15327 effect(TEMP dst, TEMP scratch); 15328 format %{ "vcmpltpd k2,$src1,$src2\n\t" 15329 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} 15330 ins_encode %{ 15331 int vector_len = 2; 15332 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 15333 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15334 KRegister mask = k0; // The comparison itself is not being masked. 15335 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15336 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15337 %} 15338 ins_pipe( pipe_slow ); 15339 %} 15340 15341 instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ 15342 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15343 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15344 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15345 match(Set dst (VectorMaskCmp src1 src2)); 15346 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} 15347 ins_encode %{ 15348 int vector_len = 0; 15349 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 15350 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15351 %} 15352 ins_pipe( pipe_slow ); 15353 %} 15354 15355 instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ 15356 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15357 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15358 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15359 match(Set dst (VectorMaskCmp src1 src2)); 15360 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} 15361 ins_encode %{ 15362 int vector_len = 0; 15363 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 15364 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15365 %} 15366 ins_pipe( pipe_slow ); 15367 %} 15368 15369 instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ 15370 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15371 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15372 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15373 match(Set dst (VectorMaskCmp src1 src2)); 15374 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} 15375 ins_encode %{ 15376 int vector_len = 1; 15377 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 15378 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15379 %} 15380 ins_pipe( pipe_slow ); 15381 %} 15382 15383 instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15384 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15385 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15386 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15387 match(Set dst (VectorMaskCmp src1 src2)); 15388 effect(TEMP dst, TEMP scratch); 15389 format %{ "vcmpgtpd k2,$src1,$src2\n\t" 15390 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} 15391 ins_encode %{ 15392 int vector_len = 2; 15393 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 15394 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15395 KRegister mask = k0; // The comparison itself is not being masked. 15396 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15397 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15398 %} 15399 ins_pipe( pipe_slow ); 15400 %} 15401 15402 instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ 15403 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15404 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15405 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15406 match(Set dst (VectorMaskCmp src1 src2)); 15407 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} 15408 ins_encode %{ 15409 int vector_len = 0; 15410 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15411 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15412 %} 15413 ins_pipe( pipe_slow ); 15414 %} 15415 15416 instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ 15417 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15418 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15419 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15420 match(Set dst (VectorMaskCmp src1 src2)); 15421 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} 15422 ins_encode %{ 15423 int vector_len = 0; 15424 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15425 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15426 %} 15427 ins_pipe( pipe_slow ); 15428 %} 15429 15430 instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ 15431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15432 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15433 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15434 match(Set dst (VectorMaskCmp src1 src2)); 15435 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} 15436 ins_encode %{ 15437 int vector_len = 1; 15438 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15439 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15440 %} 15441 ins_pipe( pipe_slow ); 15442 %} 15443 15444 instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15445 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15446 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15447 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15448 match(Set dst (VectorMaskCmp src1 src2)); 15449 effect(TEMP dst, TEMP scratch); 15450 format %{ "vcmpgepd k2,$src1,$src2\n\t" 15451 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} 15452 ins_encode %{ 15453 int vector_len = 2; 15454 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 15455 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15456 KRegister mask = k0; // The comparison itself is not being masked. 15457 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15458 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15459 %} 15460 ins_pipe( pipe_slow ); 15461 %} 15462 15463 instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ 15464 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15465 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15466 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15467 match(Set dst (VectorMaskCmp src1 src2)); 15468 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} 15469 ins_encode %{ 15470 int vector_len = 0; 15471 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15472 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15473 %} 15474 ins_pipe( pipe_slow ); 15475 %} 15476 15477 instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ 15478 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15479 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15480 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15481 match(Set dst (VectorMaskCmp src1 src2)); 15482 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} 15483 ins_encode %{ 15484 int vector_len = 0; 15485 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15486 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15487 %} 15488 ins_pipe( pipe_slow ); 15489 %} 15490 15491 instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ 15492 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15493 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15494 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15495 match(Set dst (VectorMaskCmp src1 src2)); 15496 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} 15497 ins_encode %{ 15498 int vector_len = 1; 15499 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15500 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15501 %} 15502 ins_pipe( pipe_slow ); 15503 %} 15504 15505 instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15506 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15507 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15508 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15509 match(Set dst (VectorMaskCmp src1 src2)); 15510 effect(TEMP dst, TEMP scratch); 15511 format %{ "vcmplepd k2,$src1,$src2\n\t" 15512 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} 15513 ins_encode %{ 15514 int vector_len = 2; 15515 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 15516 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15517 KRegister mask = k0; // The comparison itself is not being masked. 15518 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15519 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15520 %} 15521 ins_pipe( pipe_slow ); 15522 %} 15523 15524 instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ 15525 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15526 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15527 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15528 match(Set dst (VectorMaskCmp src1 src2)); 15529 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} 15530 ins_encode %{ 15531 int vector_len = 0; 15532 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15533 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15534 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15535 %} 15536 ins_pipe( pipe_slow ); 15537 %} 15538 15539 instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ 15540 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15541 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15542 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15543 match(Set dst (VectorMaskCmp src1 src2)); 15544 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} 15545 ins_encode %{ 15546 int vector_len = 0; 15547 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15548 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15549 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15550 %} 15551 ins_pipe( pipe_slow ); 15552 %} 15553 15554 instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ 15555 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15556 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15557 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15558 match(Set dst (VectorMaskCmp src1 src2)); 15559 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} 15560 ins_encode %{ 15561 int vector_len = 1; 15562 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15563 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15564 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15565 %} 15566 ins_pipe( pipe_slow ); 15567 %} 15568 15569 instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15570 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15571 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15572 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15573 match(Set dst (VectorMaskCmp src1 src2)); 15574 effect(TEMP dst, TEMP scratch); 15575 format %{ "vcmpnepd k2,$src1,$src2\n\t" 15576 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} 15577 ins_encode %{ 15578 int vector_len = 2; 15579 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 15580 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 15581 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15582 KRegister mask = k0; // The comparison itself is not being masked. 15583 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15584 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15585 %} 15586 ins_pipe( pipe_slow ); 15587 %} 15588 15589 instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ 15590 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15591 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15592 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15593 match(Set dst (VectorMaskCmp src1 src2)); 15594 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} 15595 ins_encode %{ 15596 int vector_len = 0; 15597 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15598 %} 15599 ins_pipe( pipe_slow ); 15600 %} 15601 15602 instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ 15603 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15604 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15605 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15606 match(Set dst (VectorMaskCmp src1 src2)); 15607 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} 15608 ins_encode %{ 15609 int vector_len = 0; 15610 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15611 %} 15612 ins_pipe( pipe_slow ); 15613 %} 15614 15615 instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ 15616 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15617 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15618 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15619 match(Set dst (VectorMaskCmp src1 src2)); 15620 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} 15621 ins_encode %{ 15622 int vector_len = 1; 15623 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15624 %} 15625 ins_pipe( pipe_slow ); 15626 %} 15627 15628 instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15629 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15630 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15631 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15632 match(Set dst (VectorMaskCmp src1 src2)); 15633 effect(TEMP dst, TEMP scratch); 15634 format %{ "vpcmpeqd k2,$src1,$src2\n\t" 15635 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} 15636 ins_encode %{ 15637 int vector_len = 2; 15638 Assembler::ComparisonPredicate cmp = Assembler::eq; 15639 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15640 KRegister mask = k0; // The comparison itself is not being masked. 15641 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15642 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15643 %} 15644 ins_pipe( pipe_slow ); 15645 %} 15646 15647 instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ 15648 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15649 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15650 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15651 match(Set dst (VectorMaskCmp src1 src2)); 15652 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} 15653 ins_encode %{ 15654 int vector_len = 0; 15655 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15656 %} 15657 ins_pipe( pipe_slow ); 15658 %} 15659 15660 instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ 15661 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15662 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15663 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15664 match(Set dst (VectorMaskCmp src1 src2)); 15665 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} 15666 ins_encode %{ 15667 int vector_len = 0; 15668 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15669 %} 15670 ins_pipe( pipe_slow ); 15671 %} 15672 15673 instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ 15674 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15675 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15676 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15677 match(Set dst (VectorMaskCmp src1 src2)); 15678 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} 15679 ins_encode %{ 15680 int vector_len = 1; 15681 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15682 %} 15683 ins_pipe( pipe_slow ); 15684 %} 15685 15686 instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15687 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15688 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15689 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15690 match(Set dst (VectorMaskCmp src1 src2)); 15691 effect(TEMP dst, TEMP scratch); 15692 format %{ "vpcmpnled k2,$src1,$src2\n\t" 15693 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 15694 ins_encode %{ 15695 int vector_len = 2; 15696 Assembler::ComparisonPredicate cmp = Assembler::lt; 15697 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15698 KRegister mask = k0; // The comparison itself is not being masked. 15699 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15700 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15701 %} 15702 ins_pipe( pipe_slow ); 15703 %} 15704 15705 instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ 15706 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15707 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15708 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15709 match(Set dst (VectorMaskCmp src1 src2)); 15710 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} 15711 ins_encode %{ 15712 int vector_len = 0; 15713 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15714 %} 15715 ins_pipe( pipe_slow ); 15716 %} 15717 15718 instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ 15719 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15720 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15721 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15722 match(Set dst (VectorMaskCmp src1 src2)); 15723 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} 15724 ins_encode %{ 15725 int vector_len = 0; 15726 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15727 %} 15728 ins_pipe( pipe_slow ); 15729 %} 15730 15731 instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ 15732 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15733 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15734 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15735 match(Set dst (VectorMaskCmp src1 src2)); 15736 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} 15737 ins_encode %{ 15738 int vector_len = 1; 15739 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15740 %} 15741 ins_pipe( pipe_slow ); 15742 %} 15743 15744 instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15745 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15746 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15747 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15748 match(Set dst (VectorMaskCmp src1 src2)); 15749 effect(TEMP dst, TEMP scratch); 15750 format %{ "vpcmpnled k2,$src1,$src2\n\t" 15751 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 15752 ins_encode %{ 15753 int vector_len = 2; 15754 Assembler::ComparisonPredicate cmp = Assembler::nle; 15755 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15756 KRegister mask = k0; // The comparison itself is not being masked. 15757 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15758 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15759 %} 15760 ins_pipe( pipe_slow ); 15761 %} 15762 15763 instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15764 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15765 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15766 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15767 match(Set dst (VectorMaskCmp src1 src2)); 15768 effect(TEMP scratch); 15769 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 15770 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} 15771 ins_encode %{ 15772 int vector_len = 0; 15773 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15774 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15775 %} 15776 ins_pipe( pipe_slow ); 15777 %} 15778 15779 instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15780 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15781 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15782 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15783 match(Set dst (VectorMaskCmp src1 src2)); 15784 effect(TEMP scratch); 15785 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 15786 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} 15787 ins_encode %{ 15788 int vector_len = 0; 15789 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15790 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15791 %} 15792 ins_pipe( pipe_slow ); 15793 %} 15794 15795 instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15796 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15797 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15798 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15799 match(Set dst (VectorMaskCmp src1 src2)); 15800 effect(TEMP scratch); 15801 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 15802 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} 15803 ins_encode %{ 15804 int vector_len = 1; 15805 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15806 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15807 %} 15808 ins_pipe( pipe_slow ); 15809 %} 15810 15811 instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15812 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15813 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15814 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15815 match(Set dst (VectorMaskCmp src1 src2)); 15816 effect(TEMP dst, TEMP scratch); 15817 format %{ "vpcmpnltd k2,$src1,$src2\n\t" 15818 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} 15819 ins_encode %{ 15820 int vector_len = 2; 15821 Assembler::ComparisonPredicate cmp = Assembler::nlt; 15822 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15823 KRegister mask = k0; // The comparison itself is not being masked. 15824 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15825 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15826 %} 15827 ins_pipe( pipe_slow ); 15828 %} 15829 15830 instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15831 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15832 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15833 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15834 match(Set dst (VectorMaskCmp src1 src2)); 15835 effect(TEMP scratch); 15836 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 15837 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} 15838 ins_encode %{ 15839 int vector_len = 0; 15840 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15841 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15842 %} 15843 ins_pipe( pipe_slow ); 15844 %} 15845 15846 instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15847 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15848 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15849 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15850 match(Set dst (VectorMaskCmp src1 src2)); 15851 effect(TEMP scratch); 15852 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 15853 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} 15854 ins_encode %{ 15855 int vector_len = 0; 15856 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15857 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15858 %} 15859 ins_pipe( pipe_slow ); 15860 %} 15861 15862 instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15863 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15864 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15865 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15866 match(Set dst (VectorMaskCmp src1 src2)); 15867 effect(TEMP scratch); 15868 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 15869 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} 15870 ins_encode %{ 15871 int vector_len = 1; 15872 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15873 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15874 %} 15875 ins_pipe( pipe_slow ); 15876 %} 15877 15878 instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15879 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15880 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15881 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15882 match(Set dst (VectorMaskCmp src1 src2)); 15883 effect(TEMP dst, TEMP scratch); 15884 format %{ "vpcmpled k2,$src1,$src2\n\t" 15885 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} 15886 ins_encode %{ 15887 int vector_len = 2; 15888 Assembler::ComparisonPredicate cmp = Assembler::le; 15889 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15890 KRegister mask = k0; // The comparison itself is not being masked. 15891 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15892 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15893 %} 15894 ins_pipe( pipe_slow ); 15895 %} 15896 15897 instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15898 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15899 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15900 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15901 match(Set dst (VectorMaskCmp src1 src2)); 15902 effect(TEMP scratch); 15903 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 15904 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} 15905 ins_encode %{ 15906 int vector_len = 0; 15907 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15908 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15909 %} 15910 ins_pipe( pipe_slow ); 15911 %} 15912 15913 instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15914 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15915 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15916 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15917 match(Set dst (VectorMaskCmp src1 src2)); 15918 effect(TEMP scratch); 15919 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 15920 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} 15921 ins_encode %{ 15922 int vector_len = 0; 15923 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15924 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15925 %} 15926 ins_pipe( pipe_slow ); 15927 %} 15928 15929 instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15930 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15931 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15932 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15933 match(Set dst (VectorMaskCmp src1 src2)); 15934 effect(TEMP scratch); 15935 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 15936 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} 15937 ins_encode %{ 15938 int vector_len = 1; 15939 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15940 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15941 %} 15942 ins_pipe( pipe_slow ); 15943 %} 15944 15945 instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15946 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15947 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15948 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15949 match(Set dst (VectorMaskCmp src1 src2)); 15950 effect(TEMP dst, TEMP scratch); 15951 format %{ "vpcmpneqd k2,$src1,$src2\n\t" 15952 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} 15953 ins_encode %{ 15954 int vector_len = 2; 15955 Assembler::ComparisonPredicate cmp = Assembler::neq; 15956 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15957 KRegister mask = k0; // The comparison itself is not being masked. 15958 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15959 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15960 %} 15961 ins_pipe( pipe_slow ); 15962 %} 15963 15964 instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ 15965 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15966 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15967 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15968 match(Set dst (VectorMaskCmp src1 src2)); 15969 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} 15970 ins_encode %{ 15971 int vector_len = 0; 15972 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15973 %} 15974 ins_pipe( pipe_slow ); 15975 %} 15976 15977 instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ 15978 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15979 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15980 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15981 match(Set dst (VectorMaskCmp src1 src2)); 15982 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} 15983 ins_encode %{ 15984 int vector_len = 0; 15985 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15986 %} 15987 ins_pipe( pipe_slow ); 15988 %} 15989 15990 instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ 15991 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15992 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15993 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15994 match(Set dst (VectorMaskCmp src1 src2)); 15995 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} 15996 ins_encode %{ 15997 int vector_len = 1; 15998 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15999 %} 16000 ins_pipe( pipe_slow ); 16001 %} 16002 16003 instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16004 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 16005 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16006 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16007 match(Set dst (VectorMaskCmp src1 src2)); 16008 effect(TEMP dst, TEMP scratch); 16009 format %{ "vpcmpeqb k2,$src1,$src2\n\t" 16010 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} 16011 ins_encode %{ 16012 int vector_len = 2; 16013 Assembler::ComparisonPredicate cmp = Assembler::eq; 16014 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16015 KRegister mask = k0; // The comparison itself is not being masked. 16016 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16017 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16018 %} 16019 ins_pipe( pipe_slow ); 16020 %} 16021 16022 instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ 16023 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16024 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16025 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16026 match(Set dst (VectorMaskCmp src1 src2)); 16027 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} 16028 ins_encode %{ 16029 int vector_len = 0; 16030 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16031 %} 16032 ins_pipe( pipe_slow ); 16033 %} 16034 16035 instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ 16036 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 16037 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16038 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16039 match(Set dst (VectorMaskCmp src1 src2)); 16040 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} 16041 ins_encode %{ 16042 int vector_len = 0; 16043 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16044 %} 16045 ins_pipe( pipe_slow ); 16046 %} 16047 16048 instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ 16049 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 16050 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16051 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16052 match(Set dst (VectorMaskCmp src1 src2)); 16053 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} 16054 ins_encode %{ 16055 int vector_len = 1; 16056 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16057 %} 16058 ins_pipe( pipe_slow ); 16059 %} 16060 16061 instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16062 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 16063 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16064 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16065 match(Set dst (VectorMaskCmp src1 src2)); 16066 effect(TEMP dst, TEMP scratch); 16067 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 16068 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 16069 ins_encode %{ 16070 int vector_len = 2; 16071 Assembler::ComparisonPredicate cmp = Assembler::lt; 16072 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16073 KRegister mask = k0; // The comparison itself is not being masked. 16074 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16075 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16076 %} 16077 ins_pipe( pipe_slow ); 16078 %} 16079 16080 instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ 16081 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16082 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16083 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16084 match(Set dst (VectorMaskCmp src1 src2)); 16085 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} 16086 ins_encode %{ 16087 int vector_len = 0; 16088 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16089 %} 16090 ins_pipe( pipe_slow ); 16091 %} 16092 16093 instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ 16094 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 16095 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16096 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16097 match(Set dst (VectorMaskCmp src1 src2)); 16098 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} 16099 ins_encode %{ 16100 int vector_len = 0; 16101 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16102 %} 16103 ins_pipe( pipe_slow ); 16104 %} 16105 16106 instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ 16107 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 16108 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16109 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16110 match(Set dst (VectorMaskCmp src1 src2)); 16111 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} 16112 ins_encode %{ 16113 int vector_len = 1; 16114 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16115 %} 16116 ins_pipe( pipe_slow ); 16117 %} 16118 16119 instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16120 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 16121 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16122 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16123 match(Set dst (VectorMaskCmp src1 src2)); 16124 effect(TEMP dst, TEMP scratch); 16125 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 16126 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 16127 ins_encode %{ 16128 int vector_len = 2; 16129 Assembler::ComparisonPredicate cmp = Assembler::nle; 16130 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16131 KRegister mask = k0; // The comparison itself is not being masked. 16132 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16133 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16134 %} 16135 ins_pipe( pipe_slow ); 16136 %} 16137 16138 instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16139 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16140 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16141 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16142 match(Set dst (VectorMaskCmp src1 src2)); 16143 effect(TEMP scratch); 16144 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 16145 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} 16146 ins_encode %{ 16147 int vector_len = 0; 16148 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16149 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16150 %} 16151 ins_pipe( pipe_slow ); 16152 %} 16153 16154 instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16155 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 16156 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16157 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16158 match(Set dst (VectorMaskCmp src1 src2)); 16159 effect(TEMP scratch); 16160 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 16161 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} 16162 ins_encode %{ 16163 int vector_len = 0; 16164 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16165 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16166 %} 16167 ins_pipe( pipe_slow ); 16168 %} 16169 16170 instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16171 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 16172 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16173 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16174 match(Set dst (VectorMaskCmp src1 src2)); 16175 effect(TEMP scratch); 16176 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 16177 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} 16178 ins_encode %{ 16179 int vector_len = 1; 16180 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16181 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16182 %} 16183 ins_pipe( pipe_slow ); 16184 %} 16185 16186 instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16187 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 16188 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16189 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16190 match(Set dst (VectorMaskCmp src1 src2)); 16191 effect(TEMP dst, TEMP scratch); 16192 format %{ "vpcmpnltb k2,$src1,$src2\n\t" 16193 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} 16194 ins_encode %{ 16195 int vector_len = 2; 16196 Assembler::ComparisonPredicate cmp = Assembler::nlt; 16197 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16198 KRegister mask = k0; // The comparison itself is not being masked. 16199 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16200 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16201 %} 16202 ins_pipe( pipe_slow ); 16203 %} 16204 16205 instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16206 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16207 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16208 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16209 match(Set dst (VectorMaskCmp src1 src2)); 16210 effect(TEMP scratch); 16211 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 16212 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} 16213 ins_encode %{ 16214 int vector_len = 0; 16215 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16216 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16217 %} 16218 ins_pipe( pipe_slow ); 16219 %} 16220 16221 instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16222 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 16223 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16224 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16225 match(Set dst (VectorMaskCmp src1 src2)); 16226 effect(TEMP scratch); 16227 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 16228 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} 16229 ins_encode %{ 16230 int vector_len = 0; 16231 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16232 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16233 %} 16234 ins_pipe( pipe_slow ); 16235 %} 16236 16237 instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16238 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 16239 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16240 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16241 match(Set dst (VectorMaskCmp src1 src2)); 16242 effect(TEMP scratch); 16243 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 16244 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} 16245 ins_encode %{ 16246 int vector_len = 1; 16247 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16248 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16249 %} 16250 ins_pipe( pipe_slow ); 16251 %} 16252 16253 instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16254 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 16255 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16256 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16257 match(Set dst (VectorMaskCmp src1 src2)); 16258 effect(TEMP dst, TEMP scratch); 16259 format %{ "vpcmpleb k2,$src1,$src2\n\t" 16260 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} 16261 ins_encode %{ 16262 int vector_len = 2; 16263 Assembler::ComparisonPredicate cmp = Assembler::le; 16264 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16265 KRegister mask = k0; // The comparison itself is not being masked. 16266 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16267 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16268 %} 16269 ins_pipe( pipe_slow ); 16270 %} 16271 16272 instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16273 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16274 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16275 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16276 match(Set dst (VectorMaskCmp src1 src2)); 16277 effect(TEMP scratch); 16278 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 16279 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} 16280 ins_encode %{ 16281 int vector_len = 0; 16282 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16283 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16284 %} 16285 ins_pipe( pipe_slow ); 16286 %} 16287 16288 instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16289 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 16290 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16291 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16292 match(Set dst (VectorMaskCmp src1 src2)); 16293 effect(TEMP scratch); 16294 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 16295 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} 16296 ins_encode %{ 16297 int vector_len = 0; 16298 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16299 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16300 %} 16301 ins_pipe( pipe_slow ); 16302 %} 16303 16304 instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16305 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 16306 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16307 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16308 match(Set dst (VectorMaskCmp src1 src2)); 16309 effect(TEMP scratch); 16310 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 16311 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} 16312 ins_encode %{ 16313 int vector_len = 1; 16314 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16315 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16316 %} 16317 ins_pipe( pipe_slow ); 16318 %} 16319 16320 instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16321 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 16322 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16323 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16324 match(Set dst (VectorMaskCmp src1 src2)); 16325 effect(TEMP dst, TEMP scratch); 16326 format %{ "vpcmpneqb k2,$src1,$src2\n\t" 16327 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} 16328 ins_encode %{ 16329 int vector_len = 2; 16330 Assembler::ComparisonPredicate cmp = Assembler::neq; 16331 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16332 KRegister mask = k0; // The comparison itself is not being masked. 16333 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16334 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16335 %} 16336 ins_pipe( pipe_slow ); 16337 %} 16338 16339 instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ 16340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 16341 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16342 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16343 match(Set dst (VectorMaskCmp src1 src2)); 16344 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} 16345 ins_encode %{ 16346 int vector_len = 0; 16347 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16348 %} 16349 ins_pipe( pipe_slow ); 16350 %} 16351 16352 instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ 16353 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16354 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16355 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16356 match(Set dst (VectorMaskCmp src1 src2)); 16357 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} 16358 ins_encode %{ 16359 int vector_len = 0; 16360 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16361 %} 16362 ins_pipe( pipe_slow ); 16363 %} 16364 16365 instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ 16366 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 16367 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16368 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16369 match(Set dst (VectorMaskCmp src1 src2)); 16370 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} 16371 ins_encode %{ 16372 int vector_len = 1; 16373 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16374 %} 16375 ins_pipe( pipe_slow ); 16376 %} 16377 16378 instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16379 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 16380 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16381 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16382 match(Set dst (VectorMaskCmp src1 src2)); 16383 effect(TEMP dst, TEMP scratch); 16384 format %{ "vpcmpeqw k2,$src1,$src2\n\t" 16385 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} 16386 ins_encode %{ 16387 int vector_len = 2; 16388 Assembler::ComparisonPredicate cmp = Assembler::eq; 16389 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16390 KRegister mask = k0; // The comparison itself is not being masked. 16391 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16392 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16393 %} 16394 ins_pipe( pipe_slow ); 16395 %} 16396 16397 instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ 16398 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 16399 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16400 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16401 match(Set dst (VectorMaskCmp src1 src2)); 16402 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} 16403 ins_encode %{ 16404 int vector_len = 0; 16405 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16406 %} 16407 ins_pipe( pipe_slow ); 16408 %} 16409 16410 instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ 16411 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16412 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16413 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16414 match(Set dst (VectorMaskCmp src1 src2)); 16415 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} 16416 ins_encode %{ 16417 int vector_len = 0; 16418 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16419 %} 16420 ins_pipe( pipe_slow ); 16421 %} 16422 16423 instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ 16424 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 16425 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16426 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16427 match(Set dst (VectorMaskCmp src1 src2)); 16428 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} 16429 ins_encode %{ 16430 int vector_len = 1; 16431 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16432 %} 16433 ins_pipe( pipe_slow ); 16434 %} 16435 16436 instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16437 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 16438 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16439 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16440 match(Set dst (VectorMaskCmp src1 src2)); 16441 effect(TEMP dst, TEMP scratch); 16442 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 16443 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 16444 ins_encode %{ 16445 int vector_len = 2; 16446 Assembler::ComparisonPredicate cmp = Assembler::lt; 16447 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16448 KRegister mask = k0; // The comparison itself is not being masked. 16449 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16450 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16451 %} 16452 ins_pipe( pipe_slow ); 16453 %} 16454 16455 instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ 16456 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 16457 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16458 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16459 match(Set dst (VectorMaskCmp src1 src2)); 16460 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} 16461 ins_encode %{ 16462 int vector_len = 0; 16463 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16464 %} 16465 ins_pipe( pipe_slow ); 16466 %} 16467 16468 instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ 16469 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16470 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16471 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16472 match(Set dst (VectorMaskCmp src1 src2)); 16473 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} 16474 ins_encode %{ 16475 int vector_len = 0; 16476 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16477 %} 16478 ins_pipe( pipe_slow ); 16479 %} 16480 16481 instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ 16482 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 16483 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16484 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16485 match(Set dst (VectorMaskCmp src1 src2)); 16486 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} 16487 ins_encode %{ 16488 int vector_len = 1; 16489 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16490 %} 16491 ins_pipe( pipe_slow ); 16492 %} 16493 16494 instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16495 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 16496 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16497 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16498 match(Set dst (VectorMaskCmp src1 src2)); 16499 effect(TEMP dst, TEMP scratch); 16500 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 16501 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 16502 ins_encode %{ 16503 int vector_len = 2; 16504 Assembler::ComparisonPredicate cmp = Assembler::nle; 16505 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16506 KRegister mask = k0; // The comparison itself is not being masked. 16507 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16508 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16509 %} 16510 ins_pipe( pipe_slow ); 16511 %} 16512 16513 instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16514 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 16515 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16516 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16517 match(Set dst (VectorMaskCmp src1 src2)); 16518 effect(TEMP scratch); 16519 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 16520 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} 16521 ins_encode %{ 16522 int vector_len = 0; 16523 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16524 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16525 %} 16526 ins_pipe( pipe_slow ); 16527 %} 16528 16529 instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16530 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16531 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16532 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16533 match(Set dst (VectorMaskCmp src1 src2)); 16534 effect(TEMP scratch); 16535 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 16536 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} 16537 ins_encode %{ 16538 int vector_len = 0; 16539 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16540 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16541 %} 16542 ins_pipe( pipe_slow ); 16543 %} 16544 16545 instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16546 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 16547 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16548 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16549 match(Set dst (VectorMaskCmp src1 src2)); 16550 effect(TEMP scratch); 16551 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 16552 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} 16553 ins_encode %{ 16554 int vector_len = 1; 16555 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16556 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16557 %} 16558 ins_pipe( pipe_slow ); 16559 %} 16560 16561 instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16562 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 16563 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16564 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16565 match(Set dst (VectorMaskCmp src1 src2)); 16566 effect(TEMP dst, TEMP scratch); 16567 format %{ "vpcmpnltw k2,$src1,$src2\n\t" 16568 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} 16569 ins_encode %{ 16570 int vector_len = 2; 16571 Assembler::ComparisonPredicate cmp = Assembler::nlt; 16572 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16573 KRegister mask = k0; // The comparison itself is not being masked. 16574 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16575 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16576 %} 16577 ins_pipe( pipe_slow ); 16578 %} 16579 16580 instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16581 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 16582 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16583 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16584 match(Set dst (VectorMaskCmp src1 src2)); 16585 effect(TEMP scratch); 16586 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 16587 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} 16588 ins_encode %{ 16589 int vector_len = 0; 16590 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16591 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16592 %} 16593 ins_pipe( pipe_slow ); 16594 %} 16595 16596 instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16597 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16598 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16599 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16600 match(Set dst (VectorMaskCmp src1 src2)); 16601 effect(TEMP scratch); 16602 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 16603 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} 16604 ins_encode %{ 16605 int vector_len = 0; 16606 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16607 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16608 %} 16609 ins_pipe( pipe_slow ); 16610 %} 16611 16612 instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16613 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 16614 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16615 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16616 match(Set dst (VectorMaskCmp src1 src2)); 16617 effect(TEMP scratch); 16618 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 16619 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} 16620 ins_encode %{ 16621 int vector_len = 1; 16622 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16623 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16624 %} 16625 ins_pipe( pipe_slow ); 16626 %} 16627 16628 instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16629 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 16630 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16631 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16632 match(Set dst (VectorMaskCmp src1 src2)); 16633 effect(TEMP dst, TEMP scratch); 16634 format %{ "vpcmplew k2,$src1,$src2\n\t" 16635 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} 16636 ins_encode %{ 16637 int vector_len = 2; 16638 Assembler::ComparisonPredicate cmp = Assembler::le; 16639 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16640 KRegister mask = k0; // The comparison itself is not being masked. 16641 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16642 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16643 %} 16644 ins_pipe( pipe_slow ); 16645 %} 16646 16647 instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16648 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 16649 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16650 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16651 match(Set dst (VectorMaskCmp src1 src2)); 16652 effect(TEMP scratch); 16653 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 16654 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} 16655 ins_encode %{ 16656 int vector_len = 0; 16657 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16658 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16659 %} 16660 ins_pipe( pipe_slow ); 16661 %} 16662 16663 instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16664 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 16665 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16666 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16667 match(Set dst (VectorMaskCmp src1 src2)); 16668 effect(TEMP scratch); 16669 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 16670 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} 16671 ins_encode %{ 16672 int vector_len = 0; 16673 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16674 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16675 %} 16676 ins_pipe( pipe_slow ); 16677 %} 16678 16679 instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16680 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 16681 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16682 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16683 match(Set dst (VectorMaskCmp src1 src2)); 16684 effect(TEMP scratch); 16685 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 16686 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} 16687 ins_encode %{ 16688 int vector_len = 1; 16689 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16690 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16691 %} 16692 ins_pipe( pipe_slow ); 16693 %} 16694 16695 instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16696 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 16697 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16698 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16699 match(Set dst (VectorMaskCmp src1 src2)); 16700 effect(TEMP dst, TEMP scratch); 16701 format %{ "vpcmpneqw k2,$src1,$src2\n\t" 16702 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} 16703 ins_encode %{ 16704 int vector_len = 2; 16705 Assembler::ComparisonPredicate cmp = Assembler::neq; 16706 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16707 KRegister mask = k0; // The comparison itself is not being masked. 16708 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16709 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16710 %} 16711 ins_pipe( pipe_slow ); 16712 %} 16713 16714 instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ 16715 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16716 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16717 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16718 match(Set dst (VectorMaskCmp src1 src2)); 16719 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} 16720 ins_encode %{ 16721 int vector_len = 0; 16722 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16723 %} 16724 ins_pipe( pipe_slow ); 16725 %} 16726 16727 instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ 16728 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16729 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16730 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16731 match(Set dst (VectorMaskCmp src1 src2)); 16732 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} 16733 ins_encode %{ 16734 int vector_len = 0; 16735 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16736 %} 16737 ins_pipe( pipe_slow ); 16738 %} 16739 16740 instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ 16741 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16742 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16743 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16744 match(Set dst (VectorMaskCmp src1 src2)); 16745 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} 16746 ins_encode %{ 16747 int vector_len = 1; 16748 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16749 %} 16750 ins_pipe( pipe_slow ); 16751 %} 16752 16753 instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16754 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16755 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 16756 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16757 match(Set dst (VectorMaskCmp src1 src2)); 16758 effect(TEMP dst, TEMP scratch); 16759 format %{ "vpcmpeqq k2,$src1,$src2\n\t" 16760 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} 16761 ins_encode %{ 16762 int vector_len = 2; 16763 Assembler::ComparisonPredicate cmp = Assembler::eq; 16764 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16765 KRegister mask = k0; // The comparison itself is not being masked. 16766 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16767 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16768 %} 16769 ins_pipe( pipe_slow ); 16770 %} 16771 16772 instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ 16773 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16774 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16775 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16776 match(Set dst (VectorMaskCmp src1 src2)); 16777 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} 16778 ins_encode %{ 16779 int vector_len = 0; 16780 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16781 %} 16782 ins_pipe( pipe_slow ); 16783 %} 16784 16785 instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ 16786 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16787 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16788 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16789 match(Set dst (VectorMaskCmp src1 src2)); 16790 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} 16791 ins_encode %{ 16792 int vector_len = 0; 16793 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16794 %} 16795 ins_pipe( pipe_slow ); 16796 %} 16797 16798 instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ 16799 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16800 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16801 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16802 match(Set dst (VectorMaskCmp src1 src2)); 16803 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} 16804 ins_encode %{ 16805 int vector_len = 1; 16806 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16807 %} 16808 ins_pipe( pipe_slow ); 16809 %} 16810 16811 instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16812 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16813 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 16814 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16815 match(Set dst (VectorMaskCmp src1 src2)); 16816 effect(TEMP dst, TEMP scratch); 16817 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 16818 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 16819 ins_encode %{ 16820 int vector_len = 2; 16821 Assembler::ComparisonPredicate cmp = Assembler::lt; 16822 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16823 KRegister mask = k0; // The comparison itself is not being masked. 16824 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16825 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16826 %} 16827 ins_pipe( pipe_slow ); 16828 %} 16829 16830 instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ 16831 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16832 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16833 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16834 match(Set dst (VectorMaskCmp src1 src2)); 16835 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} 16836 ins_encode %{ 16837 int vector_len = 0; 16838 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16839 %} 16840 ins_pipe( pipe_slow ); 16841 %} 16842 16843 instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ 16844 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16845 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16846 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16847 match(Set dst (VectorMaskCmp src1 src2)); 16848 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} 16849 ins_encode %{ 16850 int vector_len = 0; 16851 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16852 %} 16853 ins_pipe( pipe_slow ); 16854 %} 16855 16856 instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ 16857 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16858 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16859 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16860 match(Set dst (VectorMaskCmp src1 src2)); 16861 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} 16862 ins_encode %{ 16863 int vector_len = 1; 16864 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16865 %} 16866 ins_pipe( pipe_slow ); 16867 %} 16868 16869 instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16870 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16871 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 16872 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16873 match(Set dst (VectorMaskCmp src1 src2)); 16874 effect(TEMP dst, TEMP scratch); 16875 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 16876 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 16877 ins_encode %{ 16878 int vector_len = 2; 16879 Assembler::ComparisonPredicate cmp = Assembler::nle; 16880 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16881 KRegister mask = k0; // The comparison itself is not being masked. 16882 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16883 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16884 %} 16885 ins_pipe( pipe_slow ); 16886 %} 16887 16888 instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16889 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16890 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16891 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16892 match(Set dst (VectorMaskCmp src1 src2)); 16893 effect(TEMP scratch); 16894 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 16895 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} 16896 ins_encode %{ 16897 int vector_len = 0; 16898 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16899 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16900 %} 16901 ins_pipe( pipe_slow ); 16902 %} 16903 16904 instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16905 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16906 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16907 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16908 match(Set dst (VectorMaskCmp src1 src2)); 16909 effect(TEMP scratch); 16910 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 16911 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} 16912 ins_encode %{ 16913 int vector_len = 0; 16914 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16915 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16916 %} 16917 ins_pipe( pipe_slow ); 16918 %} 16919 16920 instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16921 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16922 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16923 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16924 match(Set dst (VectorMaskCmp src1 src2)); 16925 effect(TEMP scratch); 16926 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 16927 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} 16928 ins_encode %{ 16929 int vector_len = 1; 16930 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16931 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16932 %} 16933 ins_pipe( pipe_slow ); 16934 %} 16935 16936 instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16937 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16938 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16939 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16940 match(Set dst (VectorMaskCmp src1 src2)); 16941 effect(TEMP dst, TEMP scratch); 16942 format %{ "vpcmpnltq k2,$src1,$src2\n\t" 16943 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} 16944 ins_encode %{ 16945 int vector_len = 2; 16946 Assembler::ComparisonPredicate cmp = Assembler::nlt; 16947 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16948 KRegister mask = k0; // The comparison itself is not being masked. 16949 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16950 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16951 %} 16952 ins_pipe( pipe_slow ); 16953 %} 16954 16955 instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16956 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16957 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16958 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16959 match(Set dst (VectorMaskCmp src1 src2)); 16960 effect(TEMP scratch); 16961 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 16962 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} 16963 ins_encode %{ 16964 int vector_len = 0; 16965 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16966 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16967 %} 16968 ins_pipe( pipe_slow ); 16969 %} 16970 16971 instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16972 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16973 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16974 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16975 match(Set dst (VectorMaskCmp src1 src2)); 16976 effect(TEMP scratch); 16977 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 16978 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} 16979 ins_encode %{ 16980 int vector_len = 0; 16981 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16982 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16983 %} 16984 ins_pipe( pipe_slow ); 16985 %} 16986 16987 instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16988 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16989 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16990 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16991 match(Set dst (VectorMaskCmp src1 src2)); 16992 effect(TEMP scratch); 16993 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 16994 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} 16995 ins_encode %{ 16996 int vector_len = 1; 16997 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16998 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16999 %} 17000 ins_pipe( pipe_slow ); 17001 %} 17002 17003 instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 17004 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 17005 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 17006 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17007 match(Set dst (VectorMaskCmp src1 src2)); 17008 effect(TEMP dst, TEMP scratch); 17009 format %{ "vpcmpleq k2,$src1,$src2\n\t" 17010 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} 17011 ins_encode %{ 17012 int vector_len = 2; 17013 Assembler::ComparisonPredicate cmp = Assembler::le; 17014 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17015 KRegister mask = k0; // The comparison itself is not being masked. 17016 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 17017 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 17018 %} 17019 ins_pipe( pipe_slow ); 17020 %} 17021 17022 instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 17023 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 17024 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 17025 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17026 match(Set dst (VectorMaskCmp src1 src2)); 17027 effect(TEMP scratch); 17028 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 17029 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} 17030 ins_encode %{ 17031 int vector_len = 0; 17032 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17033 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17034 %} 17035 ins_pipe( pipe_slow ); 17036 %} 17037 17038 instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 17039 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 17040 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 17041 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17042 match(Set dst (VectorMaskCmp src1 src2)); 17043 effect(TEMP scratch); 17044 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 17045 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} 17046 ins_encode %{ 17047 int vector_len = 0; 17048 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17049 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17050 %} 17051 ins_pipe( pipe_slow ); 17052 %} 17053 17054 instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 17055 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 17056 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 17057 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17058 match(Set dst (VectorMaskCmp src1 src2)); 17059 effect(TEMP scratch); 17060 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 17061 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} 17062 ins_encode %{ 17063 int vector_len = 1; 17064 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17065 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17066 %} 17067 ins_pipe( pipe_slow ); 17068 %} 17069 17070 instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 17071 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 17072 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 17073 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17074 match(Set dst (VectorMaskCmp src1 src2)); 17075 effect(TEMP dst, TEMP scratch); 17076 format %{ "vpcmpneqq k2,$src1,$src2\n\t" 17077 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} 17078 ins_encode %{ 17079 int vector_len = 2; 17080 Assembler::ComparisonPredicate cmp = Assembler::neq; 17081 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17082 KRegister mask = k0; // The comparison itself is not being masked. 17083 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 17084 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 17085 %} 17086 ins_pipe( pipe_slow ); 17087 %} 17088 17089 instruct blendvps2F(vecD dst, vecD src, rxmm0 mask) %{ 17090 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 17091 match(Set dst (VectorBlend (Binary dst src) mask)); 17092 format %{ "blendvps $dst,$src,$mask\t! packed2F" %} 17093 ins_encode %{ 17094 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 17095 %} 17096 ins_pipe( pipe_slow ); 17097 %} 17098 17099 instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ 17100 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 17101 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17102 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} 17103 ins_encode %{ 17104 int vector_len = 0; 17105 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17106 %} 17107 ins_pipe( pipe_slow ); 17108 %} 17109 17110 instruct blendvps4F(vecX dst, vecX src, rxmm0 mask) %{ 17111 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 17112 match(Set dst (VectorBlend (Binary dst src) mask)); 17113 format %{ "blendvps $dst,$src,$mask\t! packed4F" %} 17114 ins_encode %{ 17115 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 17116 %} 17117 ins_pipe( pipe_slow ); 17118 %} 17119 17120 instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ 17121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 17122 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17123 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} 17124 ins_encode %{ 17125 int vector_len = 0; 17126 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17127 %} 17128 ins_pipe( pipe_slow ); 17129 %} 17130 17131 instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ 17132 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 17133 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17134 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} 17135 ins_encode %{ 17136 int vector_len = 1; 17137 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17138 %} 17139 ins_pipe( pipe_slow ); 17140 %} 17141 17142 instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 17143 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 17144 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17145 effect(TEMP scratch); 17146 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 17147 "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} 17148 ins_encode %{ 17149 int vector_len = 2; 17150 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17151 __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17152 __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 17153 %} 17154 ins_pipe( pipe_slow ); 17155 %} 17156 17157 instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 17158 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 17159 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17160 effect(TEMP scratch); 17161 format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 17162 "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} 17163 ins_encode %{ 17164 int vector_len = 2; 17165 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17166 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 17167 __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 17168 %} 17169 ins_pipe( pipe_slow ); 17170 %} 17171 17172 instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 17173 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); 17174 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17175 effect(TEMP scratch); 17176 format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" 17177 "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} 17178 ins_encode %{ 17179 int vector_len = 2; 17180 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17181 __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 17182 __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 17183 %} 17184 ins_pipe( pipe_slow ); 17185 %} 17186 17187 instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 17188 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); 17189 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17190 effect(TEMP scratch); 17191 format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" 17192 "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} 17193 ins_encode %{ 17194 int vector_len = 2; 17195 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17196 __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 17197 __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 17198 %} 17199 ins_pipe( pipe_slow ); 17200 %} 17201 17202 instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 17203 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17204 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17205 effect(TEMP scratch); 17206 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 17207 "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} 17208 ins_encode %{ 17209 int vector_len = 2; 17210 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17211 __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 17212 __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 17213 %} 17214 ins_pipe( pipe_slow ); 17215 %} 17216 17217 instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 17218 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17219 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17220 effect(TEMP scratch); 17221 format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 17222 "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} 17223 ins_encode %{ 17224 int vector_len = 2; 17225 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17226 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 17227 __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 17228 %} 17229 ins_pipe( pipe_slow ); 17230 %} 17231 17232 17233 instruct pblendvb2I(vecD dst, vecD src, rxmm0 mask) %{ 17234 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17235 match(Set dst (VectorBlend (Binary dst src) mask)); 17236 format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} 17237 ins_encode %{ 17238 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17239 %} 17240 ins_pipe( pipe_slow ); 17241 %} 17242 17243 instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ 17244 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17245 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17246 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} 17247 ins_encode %{ 17248 int vector_len = 0; 17249 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17250 %} 17251 ins_pipe( pipe_slow ); 17252 %} 17253 17254 instruct pblendvb4I(vecX dst, vecX src, rxmm0 mask) %{ 17255 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17256 match(Set dst (VectorBlend (Binary dst src) mask)); 17257 format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} 17258 ins_encode %{ 17259 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17260 %} 17261 ins_pipe( pipe_slow ); 17262 %} 17263 17264 instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ 17265 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17266 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17267 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} 17268 ins_encode %{ 17269 int vector_len = 0; 17270 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17271 %} 17272 ins_pipe( pipe_slow ); 17273 %} 17274 17275 instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ 17276 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17277 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17278 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} 17279 ins_encode %{ 17280 int vector_len = 1; 17281 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17282 %} 17283 ins_pipe( pipe_slow ); 17284 %} 17285 17286 instruct pblendvb8B(vecD dst, vecD src, rxmm0 mask) %{ 17287 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17288 match(Set dst (VectorBlend (Binary dst src) mask)); 17289 format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} 17290 ins_encode %{ 17291 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17292 %} 17293 ins_pipe( pipe_slow ); 17294 %} 17295 17296 instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ 17297 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17298 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17299 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} 17300 ins_encode %{ 17301 int vector_len = 0; 17302 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17303 %} 17304 ins_pipe( pipe_slow ); 17305 %} 17306 17307 instruct pblendvb16B(vecX dst, vecX src, rxmm0 mask) %{ 17308 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17309 match(Set dst (VectorBlend (Binary dst src) mask)); 17310 format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} 17311 ins_encode %{ 17312 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17313 %} 17314 ins_pipe( pipe_slow ); 17315 %} 17316 17317 instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ 17318 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17319 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17320 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} 17321 ins_encode %{ 17322 int vector_len = 0; 17323 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17324 %} 17325 ins_pipe( pipe_slow ); 17326 %} 17327 17328 instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ 17329 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17330 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17331 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} 17332 ins_encode %{ 17333 int vector_len = 1; 17334 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17335 %} 17336 ins_pipe( pipe_slow ); 17337 %} 17338 17339 instruct pblendvb4S(vecD dst, vecD src, rxmm0 mask) %{ 17340 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17341 match(Set dst (VectorBlend (Binary dst src) mask)); 17342 format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} 17343 ins_encode %{ 17344 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17345 %} 17346 ins_pipe( pipe_slow ); 17347 %} 17348 17349 instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ 17350 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17351 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17352 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} 17353 ins_encode %{ 17354 int vector_len = 0; 17355 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17356 %} 17357 ins_pipe( pipe_slow ); 17358 %} 17359 17360 instruct pblendvb8S(vecX dst, vecX src, rxmm0 mask) %{ 17361 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17362 match(Set dst (VectorBlend (Binary dst src) mask)); 17363 format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} 17364 ins_encode %{ 17365 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17366 %} 17367 ins_pipe( pipe_slow ); 17368 %} 17369 17370 instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ 17371 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17372 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17373 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} 17374 ins_encode %{ 17375 int vector_len = 0; 17376 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17377 %} 17378 ins_pipe( pipe_slow ); 17379 %} 17380 17381 instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ 17382 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17383 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17384 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} 17385 ins_encode %{ 17386 int vector_len = 1; 17387 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17388 %} 17389 ins_pipe( pipe_slow ); 17390 %} 17391 17392 instruct pblendvb1L(vecD dst, vecD src, rxmm0 mask) %{ 17393 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17394 match(Set dst (VectorBlend (Binary dst src) mask)); 17395 format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} 17396 ins_encode %{ 17397 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17398 %} 17399 ins_pipe( pipe_slow ); 17400 %} 17401 17402 instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ 17403 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17404 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17405 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} 17406 ins_encode %{ 17407 int vector_len = 0; 17408 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17409 %} 17410 ins_pipe( pipe_slow ); 17411 %} 17412 17413 instruct pblendvb2L(vecX dst, vecX src, rxmm0 mask) %{ 17414 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17415 match(Set dst (VectorBlend (Binary dst src) mask)); 17416 format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} 17417 ins_encode %{ 17418 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 17419 %} 17420 ins_pipe( pipe_slow ); 17421 %} 17422 17423 instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ 17424 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17425 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17426 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} 17427 ins_encode %{ 17428 int vector_len = 0; 17429 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17430 %} 17431 ins_pipe( pipe_slow ); 17432 %} 17433 17434 instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ 17435 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17436 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17437 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} 17438 ins_encode %{ 17439 int vector_len = 1; 17440 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17441 %} 17442 ins_pipe( pipe_slow ); 17443 %} 17444 17445 instruct blendvpd1D(vecD dst, vecD src, rxmm0 mask) %{ 17446 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 17447 match(Set dst (VectorBlend (Binary dst src) mask)); 17448 format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} 17449 ins_encode %{ 17450 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 17451 %} 17452 ins_pipe( pipe_slow ); 17453 %} 17454 17455 instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ 17456 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 17457 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17458 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} 17459 ins_encode %{ 17460 int vector_len = 0; 17461 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17462 %} 17463 ins_pipe( pipe_slow ); 17464 %} 17465 17466 instruct blendvpd2D(vecX dst, vecX src, rxmm0 mask) %{ 17467 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 17468 match(Set dst (VectorBlend (Binary dst src) mask)); 17469 format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} 17470 ins_encode %{ 17471 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 17472 %} 17473 ins_pipe( pipe_slow ); 17474 %} 17475 17476 instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ 17477 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 17478 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17479 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} 17480 ins_encode %{ 17481 int vector_len = 0; 17482 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17483 %} 17484 ins_pipe( pipe_slow ); 17485 %} 17486 17487 instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ 17488 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 17489 match(Set dst (VectorBlend (Binary src1 src2) mask)); 17490 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} 17491 ins_encode %{ 17492 int vector_len = 1; 17493 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 17494 %} 17495 ins_pipe( pipe_slow ); 17496 %} 17497 17498 // --------------------------------- NEG -------------------------------------- 17499 // a = -a 17500 instruct vneg2I_reg(vecD dst, vecD src) %{ 17501 predicate(UseSSE > 1 && n->as_Vector()->length() == 2); 17502 match(Set dst (NegVI src)); 17503 effect(TEMP dst); 17504 format %{ "pxor $dst,$dst\n\t" 17505 "psubd $dst, $src\t! neg packed2I" %} 17506 ins_cost(150); 17507 ins_encode %{ 17508 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17509 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 17510 %} 17511 ins_pipe( pipe_slow ); 17512 %} 17513 17514 instruct vneg4I_reg(vecX dst, vecX src) %{ 17515 predicate(UseSSE > 1 && n->as_Vector()->length() == 4); 17516 match(Set dst (NegVI src)); 17517 effect(TEMP dst); 17518 format %{ "pxor $dst,$dst\n\t" 17519 "psubd $dst, $src\t! neg packed4I" %} 17520 ins_cost(150); 17521 ins_encode %{ 17522 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17523 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 17524 %} 17525 ins_pipe( pipe_slow ); 17526 %} 17527 17528 instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ 17529 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 17530 match(Set dst (NegVI src)); 17531 effect(TEMP tmp); 17532 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 17533 "vpsubd $dst,$tmp,$src\t! neg packed8I" %} 17534 ins_cost(150); 17535 ins_encode %{ 17536 int vector_len = 1; 17537 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 17538 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 17539 %} 17540 ins_pipe( pipe_slow ); 17541 %} 17542 17543 instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ 17544 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17545 match(Set dst (NegVI src)); 17546 effect(TEMP tmp); 17547 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 17548 "vpsubd $dst,$tmp,$src\t! neg packed16I" %} 17549 ins_cost(150); 17550 ins_encode %{ 17551 int vector_len = 2; 17552 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 17553 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 17554 %} 17555 ins_pipe( pipe_slow ); 17556 %} 17557 17558 instruct vneg1D(regD dst) %{ 17559 predicate((UseSSE>=2) && (UseAVX == 0)); 17560 match(Set dst (NegVD dst)); 17561 ins_cost(150); 17562 format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} 17563 ins_encode %{ 17564 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 17565 %} 17566 ins_pipe(pipe_slow); 17567 %} 17568 17569 instruct vneg1D_reg(vecX dst, vecX src) %{ 17570 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 17571 match(Set dst (NegVD src)); 17572 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} 17573 ins_cost(150); 17574 ins_encode %{ 17575 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 17576 ExternalAddress(double_signflip())); 17577 %} 17578 ins_pipe( pipe_slow ); 17579 %} 17580 17581 instruct vneg2D_reg(vecX dst) %{ 17582 predicate((UseSSE>=2)); 17583 match(Set dst (NegVD dst)); 17584 ins_cost(150); 17585 format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} 17586 ins_encode %{ 17587 __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); 17588 %} 17589 ins_pipe(pipe_slow); 17590 %} 17591 17592 17593 instruct vneg4D_reg(vecY dst, vecY src) %{ 17594 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 17595 match(Set dst (NegVD src)); 17596 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} 17597 ins_cost(150); 17598 ins_encode %{ 17599 int vector_len = 1; 17600 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 17601 %} 17602 ins_pipe( pipe_slow ); 17603 %} 17604 17605 instruct vneg8D_reg(vecZ dst, vecZ src) %{ 17606 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 17607 match(Set dst (NegVD src)); 17608 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} 17609 ins_cost(150); 17610 ins_encode %{ 17611 int vector_len = 2; 17612 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 17613 %} 17614 ins_pipe( pipe_slow ); 17615 %} 17616 17617 instruct vneg2F_reg(vecD dst) %{ 17618 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 17619 match(Set dst (NegVF dst)); 17620 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} 17621 ins_cost(150); 17622 ins_encode %{ 17623 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 17624 %} 17625 ins_pipe( pipe_slow ); 17626 %} 17627 17628 instruct vneg4F_reg(vecX dst) %{ 17629 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 17630 match(Set dst (NegVF dst)); 17631 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} 17632 ins_cost(150); 17633 ins_encode %{ 17634 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 17635 %} 17636 ins_pipe( pipe_slow ); 17637 %} 17638 17639 instruct vneg8F_reg(vecY dst, vecY src) %{ 17640 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 17641 match(Set dst (NegVF src)); 17642 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} 17643 ins_cost(150); 17644 ins_encode %{ 17645 int vector_len = 1; 17646 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 17647 %} 17648 ins_pipe( pipe_slow ); 17649 %} 17650 17651 instruct vneg16F_reg(vecZ dst, vecZ src) %{ 17652 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17653 match(Set dst (NegVF src)); 17654 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} 17655 ins_cost(150); 17656 ins_encode %{ 17657 int vector_len = 2; 17658 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 17659 %} 17660 ins_pipe( pipe_slow ); 17661 %} 17662 17663 // --------------------------------- ABS -------------------------------------- 17664 // a = |a| 17665 instruct vabs8B_reg(vecD dst, vecD src) %{ 17666 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17667 match(Set dst (AbsV src)); 17668 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 17669 ins_cost(150); 17670 ins_encode %{ 17671 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 17672 %} 17673 ins_pipe( pipe_slow ); 17674 %} 17675 17676 instruct vabs16B_reg(vecX dst, vecX src) %{ 17677 predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17678 match(Set dst (AbsV src)); 17679 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 17680 ins_cost(150); 17681 ins_encode %{ 17682 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 17683 %} 17684 ins_pipe( pipe_slow ); 17685 %} 17686 17687 instruct vabs32B_reg(vecY dst, vecY src) %{ 17688 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17689 match(Set dst (AbsV src)); 17690 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 17691 ins_cost(150); 17692 ins_encode %{ 17693 int vector_len = 1; 17694 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17695 %} 17696 ins_pipe( pipe_slow ); 17697 %} 17698 17699 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 17700 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17701 match(Set dst (AbsV src)); 17702 format %{ "evpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 17703 ins_cost(150); 17704 ins_encode %{ 17705 int vector_len = 2; 17706 __ evpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17707 %} 17708 ins_pipe( pipe_slow ); 17709 %} 17710 17711 instruct vabs4S_reg(vecD dst, vecD src) %{ 17712 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17713 match(Set dst (AbsV src)); 17714 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 17715 ins_cost(150); 17716 ins_encode %{ 17717 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 17718 %} 17719 ins_pipe( pipe_slow ); 17720 %} 17721 17722 instruct vabs8S_reg(vecX dst, vecX src) %{ 17723 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17724 match(Set dst (AbsV src)); 17725 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 17726 ins_cost(150); 17727 ins_encode %{ 17728 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 17729 %} 17730 ins_pipe( pipe_slow ); 17731 %} 17732 17733 instruct vabs16S_reg(vecY dst, vecY src) %{ 17734 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17735 match(Set dst (AbsV src)); 17736 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 17737 ins_cost(150); 17738 ins_encode %{ 17739 int vector_len = 1; 17740 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17741 %} 17742 ins_pipe( pipe_slow ); 17743 %} 17744 17745 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 17746 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17747 match(Set dst (AbsV src)); 17748 format %{ "evpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 17749 ins_cost(150); 17750 ins_encode %{ 17751 int vector_len = 2; 17752 __ evpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17753 %} 17754 ins_pipe( pipe_slow ); 17755 %} 17756 17757 instruct vabs2I_reg(vecD dst, vecD src) %{ 17758 predicate(UseSSE > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17759 match(Set dst (AbsV src)); 17760 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 17761 ins_cost(150); 17762 ins_encode %{ 17763 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 17764 %} 17765 ins_pipe( pipe_slow ); 17766 %} 17767 17768 instruct vabs4I_reg(vecX dst, vecX src) %{ 17769 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17770 match(Set dst (AbsV src)); 17771 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 17772 ins_cost(150); 17773 ins_encode %{ 17774 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 17775 %} 17776 ins_pipe( pipe_slow ); 17777 %} 17778 17779 instruct vabs8I_reg(vecY dst, vecY src) %{ 17780 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17781 match(Set dst (AbsV src)); 17782 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 17783 ins_cost(150); 17784 ins_encode %{ 17785 int vector_len = 1; 17786 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17787 %} 17788 ins_pipe( pipe_slow ); 17789 %} 17790 17791 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 17792 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17793 match(Set dst (AbsV src)); 17794 format %{ "evpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 17795 ins_cost(150); 17796 ins_encode %{ 17797 int vector_len = 2; 17798 __ evpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17799 %} 17800 ins_pipe( pipe_slow ); 17801 %} 17802 17803 instruct vabs2L_reg(vecX dst, vecX src) %{ 17804 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17805 match(Set dst (AbsV src)); 17806 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 17807 ins_cost(150); 17808 ins_encode %{ 17809 int vector_len = 0; 17810 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17811 %} 17812 ins_pipe( pipe_slow ); 17813 %} 17814 17815 instruct vabs4L_reg(vecY dst, vecY src) %{ 17816 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17817 match(Set dst (AbsV src)); 17818 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 17819 ins_cost(150); 17820 ins_encode %{ 17821 int vector_len = 1; 17822 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17823 %} 17824 ins_pipe( pipe_slow ); 17825 %} 17826 17827 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 17828 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17829 match(Set dst (AbsV src)); 17830 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 17831 ins_cost(150); 17832 ins_encode %{ 17833 int vector_len = 2; 17834 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17835 %} 17836 ins_pipe( pipe_slow ); 17837 %} 17838 17839 instruct vabs1D_reg(vecD dst) %{ 17840 predicate(UseSSE > 0 && n->as_Vector()->length() == 1); 17841 match(Set dst (AbsVD dst)); 17842 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} 17843 ins_cost(150); 17844 ins_encode %{ 17845 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 17846 %} 17847 ins_pipe( pipe_slow ); 17848 %} 17849 17850 instruct vabs2D_reg(vecX dst) %{ 17851 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 17852 match(Set dst (AbsVD dst)); 17853 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} 17854 ins_cost(150); 17855 ins_encode %{ 17856 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 17857 %} 17858 ins_pipe( pipe_slow ); 17859 %} 17860 17861 instruct vabs4D_reg(vecY dst, vecY src) %{ 17862 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 17863 match(Set dst (AbsVD src)); 17864 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} 17865 ins_cost(150); 17866 ins_encode %{ 17867 int vector_len = 1; 17868 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 17869 %} 17870 ins_pipe( pipe_slow ); 17871 %} 17872 17873 instruct vabs8D_reg(vecZ dst, vecZ src) %{ 17874 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 17875 match(Set dst (AbsVD src)); 17876 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} 17877 ins_cost(150); 17878 ins_encode %{ 17879 int vector_len = 2; 17880 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 17881 %} 17882 ins_pipe( pipe_slow ); 17883 %} 17884 17885 instruct vabs2F_reg(vecD dst) %{ 17886 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 17887 match(Set dst (AbsVF dst)); 17888 format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} 17889 ins_cost(150); 17890 ins_encode %{ 17891 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 17892 %} 17893 ins_pipe( pipe_slow ); 17894 %} 17895 17896 instruct vabs4F_reg(vecX dst) %{ 17897 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 17898 match(Set dst (AbsVF dst)); 17899 format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} 17900 ins_cost(150); 17901 ins_encode %{ 17902 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 17903 %} 17904 ins_pipe( pipe_slow ); 17905 %} 17906 17907 instruct vabs8F_reg(vecY dst, vecY src) %{ 17908 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 17909 match(Set dst (AbsVF src)); 17910 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} 17911 ins_cost(150); 17912 ins_encode %{ 17913 int vector_len = 1; 17914 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 17915 %} 17916 ins_pipe( pipe_slow ); 17917 %} 17918 17919 instruct vabs16F_reg(vecZ dst, vecZ src) %{ 17920 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17921 match(Set dst (AbsVF src)); 17922 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} 17923 ins_cost(150); 17924 ins_encode %{ 17925 int vector_len = 2; 17926 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 17927 %} 17928 ins_pipe( pipe_slow ); 17929 %} 17930 17931 //------------------------------------- NOT -------------------------------------------- 17932 instruct vnot4B(vecS dst, vecS src) %{ 17933 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); 17934 match(Set dst (NotV src)); 17935 effect(TEMP dst); 17936 format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} 17937 ins_encode %{ 17938 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 17939 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 17940 %} 17941 ins_pipe( pipe_slow ); 17942 %} 17943 17944 instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ 17945 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 17946 match(Set dst (NotV src)); 17947 effect(TEMP scratch); 17948 format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} 17949 ins_encode %{ 17950 int vector_len = 0; 17951 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17952 %} 17953 ins_pipe( pipe_slow ); 17954 %} 17955 17956 instruct vnot8B(vecD dst, vecD src) %{ 17957 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); 17958 match(Set dst (NotV src)); 17959 effect(TEMP dst); 17960 format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} 17961 ins_encode %{ 17962 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 17963 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 17964 %} 17965 ins_pipe( pipe_slow ); 17966 %} 17967 17968 instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ 17969 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 17970 match(Set dst (NotV src)); 17971 effect(TEMP scratch); 17972 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} 17973 ins_encode %{ 17974 int vector_len = 0; 17975 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17976 %} 17977 ins_pipe( pipe_slow ); 17978 %} 17979 17980 instruct vnot16B(vecX dst, vecX src) %{ 17981 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); 17982 match(Set dst (NotV src)); 17983 effect(TEMP dst); 17984 format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} 17985 ins_encode %{ 17986 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 17987 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 17988 %} 17989 ins_pipe( pipe_slow ); 17990 %} 17991 17992 instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ 17993 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 17994 match(Set dst (NotV src)); 17995 effect(TEMP scratch); 17996 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} 17997 ins_encode %{ 17998 int vector_len = 0; 17999 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 18000 %} 18001 ins_pipe( pipe_slow ); 18002 %} 18003 18004 instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ 18005 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 18006 match(Set dst (NotV src)); 18007 effect(TEMP scratch); 18008 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} 18009 ins_encode %{ 18010 int vector_len = 1; 18011 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 18012 %} 18013 ins_pipe( pipe_slow ); 18014 %} 18015 18016 instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ 18017 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18018 match(Set dst (NotV src)); 18019 effect(TEMP scratch); 18020 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} 18021 ins_encode %{ 18022 int vector_len = 2; 18023 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 18024 %} 18025 ins_pipe( pipe_slow ); 18026 %} 18027 18028 instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{ 18029 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 18030 match(Set dst (VectorTest src1 src2 )); 18031 format %{ "vptest $src1,$src2\n\t" 18032 "setb $dst\t!" %} 18033 ins_encode %{ 18034 int vector_len = 0; 18035 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18036 __ setb(Assembler::carrySet, $dst$$Register); 18037 __ movzbl($dst$$Register, $dst$$Register); 18038 %} 18039 ins_pipe( pipe_slow ); 18040 %} 18041 18042 instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{ 18043 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 18044 match(Set dst (VectorTest src1 src2 )); 18045 format %{ "vptest $src1,$src2\n\t" 18046 "setb $dst\t!" %} 18047 ins_encode %{ 18048 int vector_len = 0; 18049 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18050 __ setb(Assembler::notZero, $dst$$Register); 18051 __ movzbl($dst$$Register, $dst$$Register); 18052 %} 18053 ins_pipe( pipe_slow ); 18054 %} 18055 18056 instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{ 18057 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 18058 match(Set dst (VectorTest src1 src2 )); 18059 format %{ "vptest $src1,$src2\n\t" 18060 "setb $dst\t!" %} 18061 ins_encode %{ 18062 int vector_len = 1; 18063 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18064 __ setb(Assembler::carrySet, $dst$$Register); 18065 __ movzbl($dst$$Register, $dst$$Register); 18066 %} 18067 ins_pipe( pipe_slow ); 18068 %} 18069 18070 instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{ 18071 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 18072 match(Set dst (VectorTest src1 src2 )); 18073 format %{ "vptest $src1,$src2\n\t" 18074 "setb $dst\t!" %} 18075 ins_encode %{ 18076 int vector_len = 1; 18077 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18078 __ setb(Assembler::notZero, $dst$$Register); 18079 __ movzbl($dst$$Register, $dst$$Register); 18080 %} 18081 ins_pipe( pipe_slow ); 18082 %} 18083 18084 instruct loadmask8b(vecD dst, vecD src) %{ 18085 predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18086 match(Set dst (VectorLoadMask src)); 18087 effect(TEMP dst); 18088 format %{ "pxor $dst,$dst\n\t" 18089 "psubb $dst,$src\t! load mask (8B to 8B)" %} 18090 ins_encode %{ 18091 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18092 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18093 %} 18094 ins_pipe( pipe_slow ); 18095 %} 18096 18097 instruct loadmask16b(vecX dst, vecX src) %{ 18098 predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18099 match(Set dst (VectorLoadMask src)); 18100 effect(TEMP dst); 18101 format %{ "vpxor $dst,$dst\n\t" 18102 "vpsubb $dst,$src\t! load mask (16B to 16B)" %} 18103 ins_encode %{ 18104 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18105 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18106 %} 18107 ins_pipe( pipe_slow ); 18108 %} 18109 18110 instruct loadmask32b(vecY dst, vecY src) %{ 18111 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18112 match(Set dst (VectorLoadMask src)); 18113 effect(TEMP dst); 18114 format %{ "vpxor $dst,$dst\n\t" 18115 "vpsubb $dst,$src\t! load mask (32B to 32B)" %} 18116 ins_encode %{ 18117 int vector_len = 1; 18118 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18119 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 18120 %} 18121 ins_pipe( pipe_slow ); 18122 %} 18123 18124 instruct loadmask64b(vecZ dst, vecZ src) %{ 18125 predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18126 match(Set dst (VectorLoadMask src)); 18127 effect(TEMP dst); 18128 format %{ "vpxor $dst,$dst\n\t" 18129 "vpsubb $dst,$src\t! load mask (64B to 64B)" %} 18130 ins_encode %{ 18131 int vector_len = 2; 18132 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18133 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 18134 %} 18135 ins_pipe( pipe_slow ); 18136 %} 18137 18138 instruct loadmask4s(vecD dst, vecS src) %{ 18139 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18140 match(Set dst (VectorLoadMask src)); 18141 effect(TEMP dst); 18142 format %{ "pxor $dst,$dst\n\t" 18143 "psubb $dst,$src\n\t" 18144 "pmovsxbw $dst\t! load mask (4B to 4S)" %} 18145 ins_encode %{ 18146 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18147 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18148 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 18149 %} 18150 ins_pipe( pipe_slow ); 18151 %} 18152 18153 instruct loadmask8s(vecX dst, vecD src) %{ 18154 predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18155 match(Set dst (VectorLoadMask src)); 18156 effect(TEMP dst); 18157 format %{ "pxor $dst,$dst\n\t" 18158 "psubb $dst,$src\n\t" 18159 "pmovsxbw $dst\t! load mask (8B to 8S)" %} 18160 ins_encode %{ 18161 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18162 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18163 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 18164 %} 18165 ins_pipe( pipe_slow ); 18166 %} 18167 18168 instruct loadmask16s(vecY dst, vecX src) %{ 18169 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18170 match(Set dst (VectorLoadMask src)); 18171 effect(TEMP dst); 18172 format %{ "vpxor $dst,$dst\n\t" 18173 "vpsubb $dst,$src\n\t" 18174 "vpmovsxbw $dst\t! load mask (16B to 16S)" %} 18175 ins_encode %{ 18176 int vector_len = 1; 18177 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 18178 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 18179 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18180 %} 18181 ins_pipe( pipe_slow ); 18182 %} 18183 18184 instruct loadmask32s(vecZ dst, vecY src) %{ 18185 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18186 match(Set dst (VectorLoadMask src)); 18187 effect(TEMP dst); 18188 format %{ "vpxor $dst,$dst\n\t" 18189 "vpsubb $dst,$src\n\t" 18190 "vpmovsxbw $dst\t! load mask (32B to 32S)" %} 18191 ins_encode %{ 18192 int vector_len = 2; 18193 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); 18194 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); 18195 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18196 %} 18197 ins_pipe( pipe_slow ); 18198 %} 18199 18200 instruct loadmask2i(vecD dst, vecS src) %{ 18201 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18202 match(Set dst (VectorLoadMask src)); 18203 effect(TEMP dst); 18204 format %{ "pxor $dst,$dst\n\t" 18205 "psubb $dst,$src\n\t" 18206 "pmovsxbd $dst\t! load mask (2B to 2I)" %} 18207 ins_encode %{ 18208 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18209 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18210 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 18211 %} 18212 ins_pipe( pipe_slow ); 18213 %} 18214 18215 instruct loadmask4i(vecX dst, vecS src) %{ 18216 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18217 match(Set dst (VectorLoadMask src)); 18218 effect(TEMP dst); 18219 format %{ "pxor $dst,$dst\n\t" 18220 "psubb $dst,$src\n\t" 18221 "pmovsxbd $dst\t! load mask (4B to 4I)" %} 18222 ins_encode %{ 18223 int vector_len = 0; 18224 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18225 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18226 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 18227 %} 18228 ins_pipe( pipe_slow ); 18229 %} 18230 18231 instruct loadmask8i(vecY dst, vecD src) %{ 18232 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18233 match(Set dst (VectorLoadMask src)); 18234 effect(TEMP dst); 18235 format %{ "vpxor $dst,$dst\n\t" 18236 "vpsubb $dst,$src\n\t" 18237 "vpmovsxbd $dst\t! load mask (8B to 8I)" %} 18238 ins_encode %{ 18239 int vector_len = 1; 18240 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 18241 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 18242 __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18243 %} 18244 ins_pipe( pipe_slow ); 18245 %} 18246 18247 instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ 18248 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18249 match(Set dst (VectorLoadMask src)); 18250 effect(TEMP dst, TEMP tmp); 18251 format %{ "vpxor $dst,$dst\n\t" 18252 "vpmovzxbd $tmp,$src\n\t" 18253 "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} 18254 ins_encode %{ 18255 int vector_len = 2; 18256 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18257 __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 18258 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18259 %} 18260 ins_pipe( pipe_slow ); 18261 %} 18262 18263 instruct loadmask1l(vecD dst, vecS src) %{ 18264 predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18265 match(Set dst (VectorLoadMask src)); 18266 effect(TEMP dst); 18267 format %{ "pxor $dst,$dst\n\t" 18268 "psubb $dst,$src\n\t" 18269 "pmovsxbq $dst\t! load mask (1B to 1L)" %} 18270 ins_encode %{ 18271 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18272 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18273 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 18274 %} 18275 ins_pipe( pipe_slow ); 18276 %} 18277 18278 instruct loadmask2l(vecX dst, vecS src) %{ 18279 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18280 match(Set dst (VectorLoadMask src)); 18281 effect(TEMP dst); 18282 format %{ "pxor $dst,$dst\n\t" 18283 "psubb $dst,$src\n\t" 18284 "pmovsxbq $dst\t! load mask (2B to 2L)" %} 18285 ins_encode %{ 18286 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 18287 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 18288 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 18289 %} 18290 ins_pipe( pipe_slow ); 18291 %} 18292 18293 instruct loadmask4l(vecY dst, vecS src) %{ 18294 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18295 match(Set dst (VectorLoadMask src)); 18296 effect(TEMP dst); 18297 format %{ "vpxor $dst,$dst\n\t" 18298 "vpsubb $dst,$src\n\t" 18299 "vpmovsxbq $dst\t! load mask (4B to 4L)" %} 18300 ins_encode %{ 18301 int vector_len = 1; 18302 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 18303 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 18304 __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18305 %} 18306 ins_pipe( pipe_slow ); 18307 %} 18308 18309 instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ 18310 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18311 match(Set dst (VectorLoadMask src)); 18312 effect(TEMP dst, TEMP tmp); 18313 format %{ "vpxor $dst,$dst\n\t" 18314 "vpmovzxbq $tmp,$src\n\t" 18315 "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} 18316 ins_encode %{ 18317 int vector_len = 2; 18318 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18319 __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 18320 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18321 %} 18322 ins_pipe( pipe_slow ); 18323 %} 18324 18325 instruct storemask8b(vecD dst, vecD src) %{ 18326 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 18327 match(Set dst (VectorStoreMask src)); 18328 format %{ "vpabsb $dst,$src\t! store mask (8B to 8B)" %} 18329 ins_encode %{ 18330 int vector_len = 0; 18331 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18332 %} 18333 ins_pipe( pipe_slow ); 18334 %} 18335 18336 instruct storemask16b(vecX dst, vecX src) %{ 18337 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 18338 match(Set dst (VectorStoreMask src)); 18339 format %{ "vpabsb $dst,$src\t! store mask (16B to 16B)" %} 18340 ins_encode %{ 18341 int vector_len = 0; 18342 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18343 %} 18344 ins_pipe( pipe_slow ); 18345 %} 18346 18347 instruct storemask32b(vecY dst, vecY src) %{ 18348 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 18349 match(Set dst (VectorStoreMask src)); 18350 format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} 18351 ins_encode %{ 18352 int vector_len = 1; 18353 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18354 %} 18355 ins_pipe( pipe_slow ); 18356 %} 18357 18358 instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ 18359 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 18360 match(Set dst (VectorStoreMask src)); 18361 effect(TEMP scratch); 18362 format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" 18363 "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} 18364 ins_encode %{ 18365 int vector_len = 2; 18366 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 18367 Assembler::ComparisonPredicate cp = Assembler::eq; 18368 __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 18369 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, vector_len, $scratch$$Register); 18370 %} 18371 ins_pipe( pipe_slow ); 18372 %} 18373 18374 instruct storemask4s(vecS dst, vecD src) %{ 18375 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 18376 match(Set dst (VectorStoreMask src)); 18377 format %{ "vpabsw $dst,$src\n\t" 18378 "vpackuswb $dst,$dst,$dst\t! store mask (4S to 4B)" %} 18379 ins_encode %{ 18380 int vector_len = 0; 18381 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18382 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18383 %} 18384 ins_pipe( pipe_slow ); 18385 %} 18386 18387 instruct storemask8s(vecD dst, vecX src) %{ 18388 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 18389 match(Set dst (VectorStoreMask src)); 18390 format %{ "vpabsw $dst,$src\n\t" 18391 "vpackuswb $dst,$dst,$dst\t! store mask (8S to 8B)" %} 18392 ins_encode %{ 18393 int vector_len = 0; 18394 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18395 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18396 %} 18397 ins_pipe( pipe_slow ); 18398 %} 18399 18400 instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ 18401 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 18402 match(Set dst (VectorStoreMask src)); 18403 effect(TEMP dst, TEMP tmp); 18404 format %{ "vpabsw $dst,$src\n\t" 18405 "vextracti128 $tmp,$dst\n\t" 18406 "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} 18407 ins_encode %{ 18408 int vector_len = 1; 18409 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18410 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18411 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18412 %} 18413 ins_pipe( pipe_slow ); 18414 %} 18415 18416 instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ 18417 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 18418 match(Set dst (VectorStoreMask src)); 18419 effect(TEMP scratch); 18420 format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" 18421 "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} 18422 ins_encode %{ 18423 int vector_len = 2; 18424 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 18425 Assembler::ComparisonPredicate cp = Assembler::eq; 18426 __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 18427 // The dst is 256-bit - thus we can do a smaller move. 18428 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 1, $scratch$$Register); 18429 %} 18430 ins_pipe( pipe_slow ); 18431 %} 18432 18433 18434 instruct storemask2i(vecS dst, vecD src) %{ 18435 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 18436 match(Set dst (VectorStoreMask src)); 18437 format %{ "vpabsd $dst,$src\n\t" 18438 "vpackusdw $dst,$dst,$dst\n\t" 18439 "vpackuswb $dst,$dst,$dst\t! store mask (2I to 2B)" %} 18440 ins_encode %{ 18441 int vector_len = 0; 18442 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18443 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18444 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18445 %} 18446 ins_pipe( pipe_slow ); 18447 %} 18448 18449 instruct storemask4i(vecS dst, vecX src) %{ 18450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 18451 match(Set dst (VectorStoreMask src)); 18452 format %{ "vpabsd $dst,$src\n\t" 18453 "vpackusdw $dst,$dst,$dst\n\t" 18454 "vpackuswb $dst,$dst,$dst\t! store mask (4I to 4B)" %} 18455 ins_encode %{ 18456 int vector_len = 0; 18457 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18458 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18459 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18460 %} 18461 ins_pipe( pipe_slow ); 18462 %} 18463 18464 instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ 18465 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 18466 match(Set dst (VectorStoreMask src)); 18467 effect(TEMP dst, TEMP tmp); 18468 format %{ "vpxor $dst,$dst\n\t" 18469 "vpsubd $dst,$src\n\t" 18470 "vextracti128 $tmp,$dst\n\t" 18471 "vpackusdw $dst,$dst,$tmp\n\t" 18472 "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} 18473 ins_encode %{ 18474 int vector_len = 1; 18475 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18476 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 18477 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18478 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18479 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 18480 %} 18481 ins_pipe( pipe_slow ); 18482 %} 18483 18484 instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ 18485 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 18486 match(Set dst (VectorStoreMask src)); 18487 effect(TEMP scratch); 18488 format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" 18489 "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} 18490 ins_encode %{ 18491 int vector_len = 2; 18492 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 18493 __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 18494 // The dst is only 128-bit - thus we can do a smaller move. 18495 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 18496 %} 18497 ins_pipe( pipe_slow ); 18498 %} 18499 18500 instruct storemask1l(vecS dst, vecD src) %{ 18501 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 18502 match(Set dst (VectorStoreMask src)); 18503 format %{ "vpabsd $dst,$src\n\t" 18504 "vpackusdw $dst,$dst,$dst\n\t" 18505 "vpackuswb $dst,$dst,$dst\t! store mask (1L to 1B)" %} 18506 ins_encode %{ 18507 int vector_len = 0; 18508 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18509 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18510 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18511 %} 18512 ins_pipe( pipe_slow ); 18513 %} 18514 18515 instruct storemask2l(vecS dst, vecX src) %{ 18516 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 18517 match(Set dst (VectorStoreMask src)); 18518 format %{ "vpshufd $dst,$src,0x8\n\t" 18519 "vpabsd $dst,$dst\n\t" 18520 "vpackusdw $dst,$dst,$dst\n\t" 18521 "vpackuswb $dst,$dst,$dst\t! store mask (2L to 2B)" %} 18522 ins_encode %{ 18523 int vector_len = 0; 18524 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8, vector_len); 18525 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18526 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18527 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18528 %} 18529 ins_pipe( pipe_slow ); 18530 %} 18531 18532 instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ 18533 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 18534 match(Set dst (VectorStoreMask src)); 18535 effect(TEMP scratch, TEMP dst); 18536 format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" 18537 "vpermd $dst,$dst,$src," 18538 "vpabsd $dst,$dst\n\t" 18539 "vpackusdw $dst,$dst,$dst\n\t" 18540 "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} 18541 ins_encode %{ 18542 // vpermd and load are 256-bit, but all others are 128-bit instructions. 18543 int vector_len = 0; 18544 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); 18545 __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister); 18546 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18547 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18548 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18549 %} 18550 ins_pipe( pipe_slow ); 18551 %} 18552 18553 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ 18554 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 18555 match(Set dst (VectorStoreMask src)); 18556 effect(TEMP scratch); 18557 format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" 18558 "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} 18559 ins_encode %{ 18560 int vector_len = 2; 18561 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 18562 Assembler::ComparisonPredicate cp = Assembler::eq; 18563 __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 18564 // The dst is only 128-bit - thus we can do a smaller move. 18565 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 18566 %} 18567 ins_pipe( pipe_slow ); 18568 %} 18569 18570 // --------------------------------- FMA -------------------------------------- 18571 18572 // a * b + c 18573 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 18574 predicate(UseFMA && n->as_Vector()->length() == 2); 18575 match(Set c (FmaVD c (Binary a b))); 18576 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 18577 ins_cost(150); 18578 ins_encode %{ 18579 int vector_len = 0; 18580 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 18581 %} 18582 ins_pipe( pipe_slow ); 18583 %} 18584 18585 // a * b + c 18586 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 18587 predicate(UseFMA && n->as_Vector()->length() == 2); 18588 match(Set c (FmaVD c (Binary a (LoadVector b)))); 18589 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 18590 ins_cost(150); 18591 ins_encode %{ 18592 int vector_len = 0; 18593 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 18594 %} 18595 ins_pipe( pipe_slow ); 18596 %} 18597 18598 18599 // a * b + c 18600 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 18601 predicate(UseFMA && n->as_Vector()->length() == 4); 18602 match(Set c (FmaVD c (Binary a b))); 18603 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 18604 ins_cost(150); 18605 ins_encode %{ 18606 int vector_len = 1; 18607 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 18608 %} 18609 ins_pipe( pipe_slow ); 18610 %} 18611 18612 // a * b + c 18613 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 18614 predicate(UseFMA && n->as_Vector()->length() == 4); 18615 match(Set c (FmaVD c (Binary a (LoadVector b)))); 18616 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 18617 ins_cost(150); 18618 ins_encode %{ 18619 int vector_len = 1; 18620 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 18621 %} 18622 ins_pipe( pipe_slow ); 18623 %} 18624 18625 // a * b + c 18626 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 18627 predicate(UseFMA && n->as_Vector()->length() == 8); 18628 match(Set c (FmaVD c (Binary a b))); 18629 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 18630 ins_cost(150); 18631 ins_encode %{ 18632 int vector_len = 2; 18633 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 18634 %} 18635 ins_pipe( pipe_slow ); 18636 %} 18637 18638 // a * b + c 18639 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 18640 predicate(UseFMA && n->as_Vector()->length() == 8); 18641 match(Set c (FmaVD c (Binary a (LoadVector b)))); 18642 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 18643 ins_cost(150); 18644 ins_encode %{ 18645 int vector_len = 2; 18646 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 18647 %} 18648 ins_pipe( pipe_slow ); 18649 %} 18650 18651 // a * b + c 18652 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 18653 predicate(UseFMA && n->as_Vector()->length() == 4); 18654 match(Set c (FmaVF c (Binary a b))); 18655 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 18656 ins_cost(150); 18657 ins_encode %{ 18658 int vector_len = 0; 18659 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 18660 %} 18661 ins_pipe( pipe_slow ); 18662 %} 18663 18664 // a * b + c 18665 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 18666 predicate(UseFMA && n->as_Vector()->length() == 4); 18667 match(Set c (FmaVF c (Binary a (LoadVector b)))); 18668 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 18669 ins_cost(150); 18670 ins_encode %{ 18671 int vector_len = 0; 18672 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 18673 %} 18674 ins_pipe( pipe_slow ); 18675 %} 18676 18677 // a * b + c 18678 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 18679 predicate(UseFMA && n->as_Vector()->length() == 8); 18680 match(Set c (FmaVF c (Binary a b))); 18681 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 18682 ins_cost(150); 18683 ins_encode %{ 18684 int vector_len = 1; 18685 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 18686 %} 18687 ins_pipe( pipe_slow ); 18688 %} 18689 18690 // a * b + c 18691 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 18692 predicate(UseFMA && n->as_Vector()->length() == 8); 18693 match(Set c (FmaVF c (Binary a (LoadVector b)))); 18694 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 18695 ins_cost(150); 18696 ins_encode %{ 18697 int vector_len = 1; 18698 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 18699 %} 18700 ins_pipe( pipe_slow ); 18701 %} 18702 18703 // a * b + c 18704 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 18705 predicate(UseFMA && n->as_Vector()->length() == 16); 18706 match(Set c (FmaVF c (Binary a b))); 18707 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 18708 ins_cost(150); 18709 ins_encode %{ 18710 int vector_len = 2; 18711 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 18712 %} 18713 ins_pipe( pipe_slow ); 18714 %} 18715 18716 // a * b + c 18717 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 18718 predicate(UseFMA && n->as_Vector()->length() == 16); 18719 match(Set c (FmaVF c (Binary a (LoadVector b)))); 18720 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 18721 ins_cost(150); 18722 ins_encode %{ 18723 int vector_len = 2; 18724 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 18725 %} 18726 ins_pipe( pipe_slow ); 18727 %} 18728 18729 // --------------------------------- PopCount -------------------------------------- 18730 18731 instruct vpopcount2I(vecD dst, vecD src) %{ 18732 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 18733 match(Set dst (PopCountVI src)); 18734 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 18735 ins_encode %{ 18736 int vector_len = 0; 18737 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18738 %} 18739 ins_pipe( pipe_slow ); 18740 %} 18741 18742 instruct vpopcount4I(vecX dst, vecX src) %{ 18743 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 18744 match(Set dst (PopCountVI src)); 18745 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 18746 ins_encode %{ 18747 int vector_len = 0; 18748 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18749 %} 18750 ins_pipe( pipe_slow ); 18751 %} 18752 18753 instruct vpopcount8I(vecY dst, vecY src) %{ 18754 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 18755 match(Set dst (PopCountVI src)); 18756 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 18757 ins_encode %{ 18758 int vector_len = 1; 18759 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18760 %} 18761 ins_pipe( pipe_slow ); 18762 %} 18763 18764 instruct vpopcount16I(vecZ dst, vecZ src) %{ 18765 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 18766 match(Set dst (PopCountVI src)); 18767 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 18768 ins_encode %{ 18769 int vector_len = 2; 18770 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18771 %} 18772 ins_pipe( pipe_slow ); 18773 %}