1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 %} 1075 1076 1077 //----------SOURCE BLOCK------------------------------------------------------- 1078 // This is a block of C++ code which provides values, functions, and 1079 // definitions necessary in the rest of the architecture description 1080 1081 source_hpp %{ 1082 // Header information of the source block. 1083 // Method declarations/definitions which are used outside 1084 // the ad-scope can conveniently be defined here. 1085 // 1086 // To keep related declarations/definitions/uses close together, 1087 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1088 1089 class NativeJump; 1090 1091 class CallStubImpl { 1092 1093 //-------------------------------------------------------------- 1094 //---< Used for optimization in Compile::shorten_branches >--- 1095 //-------------------------------------------------------------- 1096 1097 public: 1098 // Size of call trampoline stub. 1099 static uint size_call_trampoline() { 1100 return 0; // no call trampolines on this platform 1101 } 1102 1103 // number of relocations needed by a call trampoline stub 1104 static uint reloc_call_trampoline() { 1105 return 0; // no call trampolines on this platform 1106 } 1107 }; 1108 1109 class HandlerImpl { 1110 1111 public: 1112 1113 static int emit_exception_handler(CodeBuffer &cbuf); 1114 static int emit_deopt_handler(CodeBuffer& cbuf); 1115 1116 static uint size_exception_handler() { 1117 // NativeCall instruction size is the same as NativeJump. 1118 // exception handler starts out as jump and can be patched to 1119 // a call be deoptimization. (4932387) 1120 // Note that this value is also credited (in output.cpp) to 1121 // the size of the code section. 1122 return NativeJump::instruction_size; 1123 } 1124 1125 #ifdef _LP64 1126 static uint size_deopt_handler() { 1127 // three 5 byte instructions 1128 return 15; 1129 } 1130 #else 1131 static uint size_deopt_handler() { 1132 // NativeCall instruction size is the same as NativeJump. 1133 // exception handler starts out as jump and can be patched to 1134 // a call be deoptimization. (4932387) 1135 // Note that this value is also credited (in output.cpp) to 1136 // the size of the code section. 1137 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1138 } 1139 #endif 1140 }; 1141 1142 %} // end source_hpp 1143 1144 source %{ 1145 1146 #include "opto/addnode.hpp" 1147 1148 // Emit exception handler code. 1149 // Stuff framesize into a register and call a VM stub routine. 1150 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1151 1152 // Note that the code buffer's insts_mark is always relative to insts. 1153 // That's why we must use the macroassembler to generate a handler. 1154 MacroAssembler _masm(&cbuf); 1155 address base = __ start_a_stub(size_exception_handler()); 1156 if (base == NULL) { 1157 ciEnv::current()->record_failure("CodeCache is full"); 1158 return 0; // CodeBuffer::expand failed 1159 } 1160 int offset = __ offset(); 1161 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1162 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1163 __ end_a_stub(); 1164 return offset; 1165 } 1166 1167 // Emit deopt handler code. 1168 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1169 1170 // Note that the code buffer's insts_mark is always relative to insts. 1171 // That's why we must use the macroassembler to generate a handler. 1172 MacroAssembler _masm(&cbuf); 1173 address base = __ start_a_stub(size_deopt_handler()); 1174 if (base == NULL) { 1175 ciEnv::current()->record_failure("CodeCache is full"); 1176 return 0; // CodeBuffer::expand failed 1177 } 1178 int offset = __ offset(); 1179 1180 #ifdef _LP64 1181 address the_pc = (address) __ pc(); 1182 Label next; 1183 // push a "the_pc" on the stack without destroying any registers 1184 // as they all may be live. 1185 1186 // push address of "next" 1187 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1188 __ bind(next); 1189 // adjust it so it matches "the_pc" 1190 __ subptr(Address(rsp, 0), __ offset() - offset); 1191 #else 1192 InternalAddress here(__ pc()); 1193 __ pushptr(here.addr()); 1194 #endif 1195 1196 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1197 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1198 __ end_a_stub(); 1199 return offset; 1200 } 1201 1202 1203 //============================================================================= 1204 1205 // Float masks come from different places depending on platform. 1206 #ifdef _LP64 1207 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1208 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1209 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1210 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1211 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1212 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1213 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1214 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1215 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1216 static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } 1217 static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } 1218 static address vector_byte_saturationmask() { return StubRoutines::x86::vector_byte_saturation_mask(); } 1219 #else 1220 static address float_signmask() { return (address)float_signmask_pool; } 1221 static address float_signflip() { return (address)float_signflip_pool; } 1222 static address double_signmask() { return (address)double_signmask_pool; } 1223 static address double_signflip() { return (address)double_signflip_pool; } 1224 #endif 1225 1226 1227 const bool Matcher::match_rule_supported(int opcode) { 1228 if (!has_match_rule(opcode)) 1229 return false; 1230 1231 bool ret_value = true; 1232 switch (opcode) { 1233 case Op_PopCountI: 1234 case Op_PopCountL: 1235 if (!UsePopCountInstruction) 1236 ret_value = false; 1237 break; 1238 case Op_PopCountVI: 1239 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1240 ret_value = false; 1241 break; 1242 case Op_MulVB: 1243 case Op_MulVI: 1244 case Op_MulVL: 1245 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1246 ret_value = false; 1247 break; 1248 case Op_MulReductionVL: 1249 if (VM_Version::supports_avx512dq() == false) 1250 ret_value = false; 1251 break; 1252 case Op_AddReductionVL: 1253 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1254 ret_value = false; 1255 break; 1256 case Op_AddReductionVI: 1257 if (UseSSE < 3) // requires at least SSE3 1258 ret_value = false; 1259 break; 1260 case Op_MulReductionVI: 1261 if (UseSSE < 4) // requires at least SSE4 1262 ret_value = false; 1263 break; 1264 case Op_AddReductionVF: 1265 case Op_AddReductionVD: 1266 case Op_MulReductionVF: 1267 case Op_MulReductionVD: 1268 if (UseSSE < 1) // requires at least SSE 1269 ret_value = false; 1270 break; 1271 case Op_AndReductionV: 1272 if (UseSSE < 2) // requires at least SSE2 1273 ret_value = false; 1274 break; 1275 case Op_SqrtVD: 1276 case Op_SqrtVF: 1277 if (UseAVX < 1) // enabled for AVX only 1278 ret_value = false; 1279 break; 1280 case Op_CompareAndSwapL: 1281 #ifdef _LP64 1282 case Op_CompareAndSwapP: 1283 #endif 1284 if (!VM_Version::supports_cx8()) 1285 ret_value = false; 1286 break; 1287 case Op_CMoveVF: 1288 case Op_CMoveVD: 1289 if (UseAVX < 1 || UseAVX > 2) 1290 ret_value = false; 1291 break; 1292 case Op_StrIndexOf: 1293 if (!UseSSE42Intrinsics) 1294 ret_value = false; 1295 break; 1296 case Op_StrIndexOfChar: 1297 if (!UseSSE42Intrinsics) 1298 ret_value = false; 1299 break; 1300 case Op_OnSpinWait: 1301 if (VM_Version::supports_on_spin_wait() == false) 1302 ret_value = false; 1303 break; 1304 } 1305 1306 return ret_value; // Per default match rules are supported. 1307 } 1308 1309 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt, int op_arity) { 1310 // identify extra cases that we might want to provide match rules for 1311 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1312 bool ret_value = match_rule_supported(opcode); 1313 if (ret_value) { 1314 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1315 if (!vector_size_supported(bt, vlen)) { 1316 ret_value = false; 1317 } else if (size_in_bits > 256 && UseAVX <= 2) { 1318 // Only AVX512 supports 512-bit vectors 1319 ret_value = false; 1320 } else if (UseAVX == 0 && size_in_bits > 128) { 1321 // Only AVX supports 256-bit vectors 1322 ret_value = false; 1323 } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { 1324 // Byte and Short types are not supported in AVX512 if AVX512BW is not true. 1325 ret_value = false; 1326 } else { 1327 switch (opcode) { 1328 case Op_AbsV: 1329 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1330 else if (bt == T_LONG && UseAVX <= 2) { ret_value = false; } // Implementation limitation 1331 break; 1332 case Op_AddVB: 1333 case Op_SubVB: 1334 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1335 ret_value = false; 1336 break; 1337 case Op_MaxV: 1338 case Op_MinV: 1339 if ((UseSSE < 4 && (bt == T_BYTE || bt == T_INT)) || (UseAVX < 1 && bt == T_LONG)) 1340 ret_value = false; 1341 break; 1342 case Op_LShiftVI: 1343 case Op_RShiftVI: 1344 case Op_URShiftVI: 1345 if (op_arity == 2 && UseAVX <= 1) 1346 ret_value = false; 1347 break; 1348 case Op_LShiftVL: 1349 case Op_RShiftVL: 1350 case Op_URShiftVL: 1351 if (op_arity == 2 && UseAVX <= 1) 1352 ret_value = false; 1353 break; 1354 case Op_URShiftVS: 1355 case Op_RShiftVS: 1356 case Op_LShiftVS: 1357 case Op_MulVS: 1358 case Op_AddVS: 1359 case Op_SubVS: 1360 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1361 ret_value = false; 1362 break; 1363 case Op_CMoveVF: 1364 if (vlen != 8) 1365 ret_value = false; 1366 break; 1367 case Op_CMoveVD: 1368 if (vlen != 4) 1369 ret_value = false; 1370 break; 1371 case Op_VectorMaskCmp: 1372 if (UseAVX <= 0) { ret_value = false; } 1373 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1374 break; 1375 case Op_VectorBlend: 1376 if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } 1377 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1378 break; 1379 case Op_VectorTest: 1380 if (UseAVX <= 0) { ret_value = false; } 1381 else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation 1382 break; 1383 case Op_VectorLoadMask: 1384 if (UseSSE <= 3) { ret_value = false; } 1385 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1386 break; 1387 case Op_VectorStoreMask: 1388 if (UseAVX < 2) { ret_value = false; } // Implementation limitation 1389 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1390 else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation 1391 break; 1392 default: 1393 break; 1394 } 1395 } 1396 } 1397 if (ret_value) { 1398 assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && 1399 vector_size_supported(bt, vlen), "must be supported"); 1400 } 1401 1402 return ret_value; // Per default match rules are supported. 1403 } 1404 1405 const bool Matcher::has_predicated_vectors(void) { 1406 bool ret_value = false; 1407 if (UseAVX > 2) { 1408 ret_value = VM_Version::supports_avx512vl(); 1409 } 1410 1411 return ret_value; 1412 } 1413 1414 const int Matcher::float_pressure(int default_pressure_threshold) { 1415 int float_pressure_threshold = default_pressure_threshold; 1416 #ifdef _LP64 1417 if (UseAVX > 2) { 1418 // Increase pressure threshold on machines with AVX3 which have 1419 // 2x more XMM registers. 1420 float_pressure_threshold = default_pressure_threshold * 2; 1421 } 1422 #endif 1423 return float_pressure_threshold; 1424 } 1425 1426 // Max vector size in bytes. 0 if not supported. 1427 const int Matcher::vector_width_in_bytes(BasicType bt) { 1428 assert(is_java_primitive(bt), "only primitive type vectors"); 1429 if (UseSSE < 2) return 0; 1430 // SSE2 supports 128bit vectors for all types. 1431 // AVX2 supports 256bit vectors for all types. 1432 // AVX2/EVEX supports 512bit vectors for all types. 1433 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1434 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1435 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1436 size = (UseAVX > 2) ? 64 : 32; 1437 // Use flag to limit vector size. 1438 size = MIN2(size,(int)MaxVectorSize); 1439 // Minimum 2 values in vector (or 4 for bytes). 1440 switch (bt) { 1441 case T_DOUBLE: 1442 case T_LONG: 1443 if (size < 16) return 0; 1444 break; 1445 case T_FLOAT: 1446 case T_INT: 1447 if (size < 8) return 0; 1448 break; 1449 case T_BOOLEAN: 1450 if (size < 4) return 0; 1451 break; 1452 case T_CHAR: 1453 if (size < 4) return 0; 1454 break; 1455 case T_BYTE: 1456 if (size < 4) return 0; 1457 break; 1458 case T_SHORT: 1459 if (size < 4) return 0; 1460 break; 1461 default: 1462 ShouldNotReachHere(); 1463 } 1464 return size; 1465 } 1466 1467 // Limits on vector size (number of elements) loaded into vector. 1468 const int Matcher::max_vector_size(const BasicType bt) { 1469 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1470 } 1471 const int Matcher::min_vector_size(const BasicType bt) { 1472 int max_size = max_vector_size(bt); 1473 // Min size which can be loaded into vector is 4 bytes. 1474 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1475 return MIN2(size,max_size); 1476 } 1477 1478 // Vector ideal reg corresponding to specidied size in bytes 1479 const uint Matcher::vector_ideal_reg(int size) { 1480 assert(MaxVectorSize >= size, ""); 1481 switch(size) { 1482 case 4: return Op_VecS; 1483 case 8: return Op_VecD; 1484 case 16: return Op_VecX; 1485 case 32: return Op_VecY; 1486 case 64: return Op_VecZ; 1487 } 1488 ShouldNotReachHere(); 1489 return 0; 1490 } 1491 1492 // Only lowest bits of xmm reg are used for vector shift count. 1493 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1494 return Op_VecS; 1495 } 1496 1497 // x86 supports misaligned vectors store/load. 1498 const bool Matcher::misaligned_vectors_ok() { 1499 return !AlignVector; // can be changed by flag 1500 } 1501 1502 // x86 AES instructions are compatible with SunJCE expanded 1503 // keys, hence we do not need to pass the original key to stubs 1504 const bool Matcher::pass_original_key_for_aes() { 1505 return false; 1506 } 1507 1508 1509 const bool Matcher::convi2l_type_required = true; 1510 1511 // Check for shift by small constant as well 1512 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1513 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1514 shift->in(2)->get_int() <= 3 && 1515 // Are there other uses besides address expressions? 1516 !matcher->is_visited(shift)) { 1517 address_visited.set(shift->_idx); // Flag as address_visited 1518 mstack.push(shift->in(2), Matcher::Visit); 1519 Node *conv = shift->in(1); 1520 #ifdef _LP64 1521 // Allow Matcher to match the rule which bypass 1522 // ConvI2L operation for an array index on LP64 1523 // if the index value is positive. 1524 if (conv->Opcode() == Op_ConvI2L && 1525 conv->as_Type()->type()->is_long()->_lo >= 0 && 1526 // Are there other uses besides address expressions? 1527 !matcher->is_visited(conv)) { 1528 address_visited.set(conv->_idx); // Flag as address_visited 1529 mstack.push(conv->in(1), Matcher::Pre_Visit); 1530 } else 1531 #endif 1532 mstack.push(conv, Matcher::Pre_Visit); 1533 return true; 1534 } 1535 return false; 1536 } 1537 1538 // Should the Matcher clone shifts on addressing modes, expecting them 1539 // to be subsumed into complex addressing expressions or compute them 1540 // into registers? 1541 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1542 Node *off = m->in(AddPNode::Offset); 1543 if (off->is_Con()) { 1544 address_visited.test_set(m->_idx); // Flag as address_visited 1545 Node *adr = m->in(AddPNode::Address); 1546 1547 // Intel can handle 2 adds in addressing mode 1548 // AtomicAdd is not an addressing expression. 1549 // Cheap to find it by looking for screwy base. 1550 if (adr->is_AddP() && 1551 !adr->in(AddPNode::Base)->is_top() && 1552 // Are there other uses besides address expressions? 1553 !is_visited(adr)) { 1554 address_visited.set(adr->_idx); // Flag as address_visited 1555 Node *shift = adr->in(AddPNode::Offset); 1556 if (!clone_shift(shift, this, mstack, address_visited)) { 1557 mstack.push(shift, Pre_Visit); 1558 } 1559 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1560 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1561 } else { 1562 mstack.push(adr, Pre_Visit); 1563 } 1564 1565 // Clone X+offset as it also folds into most addressing expressions 1566 mstack.push(off, Visit); 1567 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1568 return true; 1569 } else if (clone_shift(off, this, mstack, address_visited)) { 1570 address_visited.test_set(m->_idx); // Flag as address_visited 1571 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1572 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1573 return true; 1574 } 1575 return false; 1576 } 1577 1578 void Compile::reshape_address(AddPNode* addp) { 1579 } 1580 1581 // Helper methods for MachSpillCopyNode::implementation(). 1582 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1583 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1584 // In 64-bit VM size calculation is very complex. Emitting instructions 1585 // into scratch buffer is used to get size in 64-bit VM. 1586 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1587 assert(ireg == Op_VecS || // 32bit vector 1588 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1589 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1590 "no non-adjacent vector moves" ); 1591 if (cbuf) { 1592 MacroAssembler _masm(cbuf); 1593 int offset = __ offset(); 1594 switch (ireg) { 1595 case Op_VecS: // copy whole register 1596 case Op_VecD: 1597 case Op_VecX: 1598 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1599 break; 1600 case Op_VecY: 1601 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1602 break; 1603 case Op_VecZ: 1604 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1605 break; 1606 default: 1607 ShouldNotReachHere(); 1608 } 1609 int size = __ offset() - offset; 1610 #ifdef ASSERT 1611 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1612 assert(!do_size || size == 4, "incorrect size calculattion"); 1613 #endif 1614 return size; 1615 #ifndef PRODUCT 1616 } else if (!do_size) { 1617 switch (ireg) { 1618 case Op_VecS: 1619 case Op_VecD: 1620 case Op_VecX: 1621 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1622 break; 1623 case Op_VecY: 1624 case Op_VecZ: 1625 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1626 break; 1627 default: 1628 ShouldNotReachHere(); 1629 } 1630 #endif 1631 } 1632 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1633 return (UseAVX > 2) ? 6 : 4; 1634 } 1635 1636 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1637 int stack_offset, int reg, uint ireg, outputStream* st) { 1638 // In 64-bit VM size calculation is very complex. Emitting instructions 1639 // into scratch buffer is used to get size in 64-bit VM. 1640 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1641 if (cbuf) { 1642 MacroAssembler _masm(cbuf); 1643 int offset = __ offset(); 1644 if (is_load) { 1645 switch (ireg) { 1646 case Op_VecS: 1647 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1648 break; 1649 case Op_VecD: 1650 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1651 break; 1652 case Op_VecX: 1653 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1654 break; 1655 case Op_VecY: 1656 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1657 break; 1658 case Op_VecZ: 1659 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1660 break; 1661 default: 1662 ShouldNotReachHere(); 1663 } 1664 } else { // store 1665 switch (ireg) { 1666 case Op_VecS: 1667 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1668 break; 1669 case Op_VecD: 1670 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1671 break; 1672 case Op_VecX: 1673 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1674 break; 1675 case Op_VecY: 1676 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1677 break; 1678 case Op_VecZ: 1679 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1680 break; 1681 default: 1682 ShouldNotReachHere(); 1683 } 1684 } 1685 int size = __ offset() - offset; 1686 #ifdef ASSERT 1687 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1688 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1689 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1690 #endif 1691 return size; 1692 #ifndef PRODUCT 1693 } else if (!do_size) { 1694 if (is_load) { 1695 switch (ireg) { 1696 case Op_VecS: 1697 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1698 break; 1699 case Op_VecD: 1700 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1701 break; 1702 case Op_VecX: 1703 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1704 break; 1705 case Op_VecY: 1706 case Op_VecZ: 1707 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1708 break; 1709 default: 1710 ShouldNotReachHere(); 1711 } 1712 } else { // store 1713 switch (ireg) { 1714 case Op_VecS: 1715 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1716 break; 1717 case Op_VecD: 1718 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1719 break; 1720 case Op_VecX: 1721 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1722 break; 1723 case Op_VecY: 1724 case Op_VecZ: 1725 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1726 break; 1727 default: 1728 ShouldNotReachHere(); 1729 } 1730 } 1731 #endif 1732 } 1733 bool is_single_byte = false; 1734 int vec_len = 0; 1735 if ((UseAVX > 2) && (stack_offset != 0)) { 1736 int tuple_type = Assembler::EVEX_FVM; 1737 int input_size = Assembler::EVEX_32bit; 1738 switch (ireg) { 1739 case Op_VecS: 1740 tuple_type = Assembler::EVEX_T1S; 1741 break; 1742 case Op_VecD: 1743 tuple_type = Assembler::EVEX_T1S; 1744 input_size = Assembler::EVEX_64bit; 1745 break; 1746 case Op_VecX: 1747 break; 1748 case Op_VecY: 1749 vec_len = 1; 1750 break; 1751 case Op_VecZ: 1752 vec_len = 2; 1753 break; 1754 } 1755 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1756 } 1757 int offset_size = 0; 1758 int size = 5; 1759 if (UseAVX > 2 ) { 1760 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1761 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1762 size += 2; // Need an additional two bytes for EVEX encoding 1763 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1764 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1765 } else { 1766 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1767 size += 2; // Need an additional two bytes for EVEX encodding 1768 } 1769 } else { 1770 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1771 } 1772 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1773 return size+offset_size; 1774 } 1775 1776 static inline jint replicate4_imm(int con, int width) { 1777 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1778 assert(width == 1 || width == 2, "only byte or short types here"); 1779 int bit_width = width * 8; 1780 jint val = con; 1781 val &= (1 << bit_width) - 1; // mask off sign bits 1782 while(bit_width < 32) { 1783 val |= (val << bit_width); 1784 bit_width <<= 1; 1785 } 1786 return val; 1787 } 1788 1789 static inline jlong replicate8_imm(int con, int width) { 1790 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1791 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1792 int bit_width = width * 8; 1793 jlong val = con; 1794 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1795 while(bit_width < 64) { 1796 val |= (val << bit_width); 1797 bit_width <<= 1; 1798 } 1799 return val; 1800 } 1801 1802 #ifndef PRODUCT 1803 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1804 st->print("nop \t# %d bytes pad for loops and calls", _count); 1805 } 1806 #endif 1807 1808 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1809 MacroAssembler _masm(&cbuf); 1810 __ nop(_count); 1811 } 1812 1813 uint MachNopNode::size(PhaseRegAlloc*) const { 1814 return _count; 1815 } 1816 1817 #ifndef PRODUCT 1818 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1819 st->print("# breakpoint"); 1820 } 1821 #endif 1822 1823 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1824 MacroAssembler _masm(&cbuf); 1825 __ int3(); 1826 } 1827 1828 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1829 return MachNode::size(ra_); 1830 } 1831 1832 %} 1833 1834 encode %{ 1835 1836 enc_class call_epilog %{ 1837 if (VerifyStackAtCalls) { 1838 // Check that stack depth is unchanged: find majik cookie on stack 1839 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1840 MacroAssembler _masm(&cbuf); 1841 Label L; 1842 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1843 __ jccb(Assembler::equal, L); 1844 // Die if stack mismatch 1845 __ int3(); 1846 __ bind(L); 1847 } 1848 %} 1849 1850 %} 1851 1852 1853 //----------OPERANDS----------------------------------------------------------- 1854 // Operand definitions must precede instruction definitions for correct parsing 1855 // in the ADLC because operands constitute user defined types which are used in 1856 // instruction definitions. 1857 1858 // This one generically applies only for evex, so only one version 1859 operand vecZ() %{ 1860 constraint(ALLOC_IN_RC(vectorz_reg)); 1861 match(VecZ); 1862 1863 format %{ %} 1864 interface(REG_INTER); 1865 %} 1866 1867 operand rxmm0() %{ 1868 Â Â constraint(ALLOC_IN_RC(xmm0_reg)); match(VecX); 1869 Â Â predicate((UseSSE > 0) && (UseAVX == 0)); format%{%} interface(REG_INTER); 1870 %} 1871 1872 // Comparison Code for FP conditional move 1873 operand cmpOp_vcmppd() %{ 1874 match(Bool); 1875 1876 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1877 n->as_Bool()->_test._test != BoolTest::no_overflow); 1878 format %{ "" %} 1879 interface(COND_INTER) %{ 1880 equal (0x0, "eq"); 1881 less (0x1, "lt"); 1882 less_equal (0x2, "le"); 1883 not_equal (0xC, "ne"); 1884 greater_equal(0xD, "ge"); 1885 greater (0xE, "gt"); 1886 //TODO cannot compile (adlc breaks) without two next lines with error: 1887 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1888 // equal' for overflow. 1889 overflow (0x20, "o"); // not really supported by the instruction 1890 no_overflow (0x21, "no"); // not really supported by the instruction 1891 %} 1892 %} 1893 1894 1895 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1896 1897 // ============================================================================ 1898 1899 instruct ShouldNotReachHere() %{ 1900 match(Halt); 1901 format %{ "ud2\t# ShouldNotReachHere" %} 1902 ins_encode %{ 1903 __ ud2(); 1904 %} 1905 ins_pipe(pipe_slow); 1906 %} 1907 1908 // =================================EVEX special=============================== 1909 1910 instruct setMask(rRegI dst, rRegI src) %{ 1911 predicate(Matcher::has_predicated_vectors()); 1912 match(Set dst (SetVectMaskI src)); 1913 effect(TEMP dst); 1914 format %{ "setvectmask $dst, $src" %} 1915 ins_encode %{ 1916 __ setvectmask($dst$$Register, $src$$Register); 1917 %} 1918 ins_pipe(pipe_slow); 1919 %} 1920 1921 // ============================================================================ 1922 1923 instruct addF_reg(regF dst, regF src) %{ 1924 predicate((UseSSE>=1) && (UseAVX == 0)); 1925 match(Set dst (AddF dst src)); 1926 1927 format %{ "addss $dst, $src" %} 1928 ins_cost(150); 1929 ins_encode %{ 1930 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1931 %} 1932 ins_pipe(pipe_slow); 1933 %} 1934 1935 instruct addF_mem(regF dst, memory src) %{ 1936 predicate((UseSSE>=1) && (UseAVX == 0)); 1937 match(Set dst (AddF dst (LoadF src))); 1938 1939 format %{ "addss $dst, $src" %} 1940 ins_cost(150); 1941 ins_encode %{ 1942 __ addss($dst$$XMMRegister, $src$$Address); 1943 %} 1944 ins_pipe(pipe_slow); 1945 %} 1946 1947 instruct addF_imm(regF dst, immF con) %{ 1948 predicate((UseSSE>=1) && (UseAVX == 0)); 1949 match(Set dst (AddF dst con)); 1950 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1951 ins_cost(150); 1952 ins_encode %{ 1953 __ addss($dst$$XMMRegister, $constantaddress($con)); 1954 %} 1955 ins_pipe(pipe_slow); 1956 %} 1957 1958 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1959 predicate(UseAVX > 0); 1960 match(Set dst (AddF src1 src2)); 1961 1962 format %{ "vaddss $dst, $src1, $src2" %} 1963 ins_cost(150); 1964 ins_encode %{ 1965 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1966 %} 1967 ins_pipe(pipe_slow); 1968 %} 1969 1970 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1971 predicate(UseAVX > 0); 1972 match(Set dst (AddF src1 (LoadF src2))); 1973 1974 format %{ "vaddss $dst, $src1, $src2" %} 1975 ins_cost(150); 1976 ins_encode %{ 1977 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1978 %} 1979 ins_pipe(pipe_slow); 1980 %} 1981 1982 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1983 predicate(UseAVX > 0); 1984 match(Set dst (AddF src con)); 1985 1986 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1987 ins_cost(150); 1988 ins_encode %{ 1989 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1990 %} 1991 ins_pipe(pipe_slow); 1992 %} 1993 1994 instruct addD_reg(regD dst, regD src) %{ 1995 predicate((UseSSE>=2) && (UseAVX == 0)); 1996 match(Set dst (AddD dst src)); 1997 1998 format %{ "addsd $dst, $src" %} 1999 ins_cost(150); 2000 ins_encode %{ 2001 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2002 %} 2003 ins_pipe(pipe_slow); 2004 %} 2005 2006 instruct addD_mem(regD dst, memory src) %{ 2007 predicate((UseSSE>=2) && (UseAVX == 0)); 2008 match(Set dst (AddD dst (LoadD src))); 2009 2010 format %{ "addsd $dst, $src" %} 2011 ins_cost(150); 2012 ins_encode %{ 2013 __ addsd($dst$$XMMRegister, $src$$Address); 2014 %} 2015 ins_pipe(pipe_slow); 2016 %} 2017 2018 instruct addD_imm(regD dst, immD con) %{ 2019 predicate((UseSSE>=2) && (UseAVX == 0)); 2020 match(Set dst (AddD dst con)); 2021 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2022 ins_cost(150); 2023 ins_encode %{ 2024 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2025 %} 2026 ins_pipe(pipe_slow); 2027 %} 2028 2029 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2030 predicate(UseAVX > 0); 2031 match(Set dst (AddD src1 src2)); 2032 2033 format %{ "vaddsd $dst, $src1, $src2" %} 2034 ins_cost(150); 2035 ins_encode %{ 2036 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2037 %} 2038 ins_pipe(pipe_slow); 2039 %} 2040 2041 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2042 predicate(UseAVX > 0); 2043 match(Set dst (AddD src1 (LoadD src2))); 2044 2045 format %{ "vaddsd $dst, $src1, $src2" %} 2046 ins_cost(150); 2047 ins_encode %{ 2048 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2049 %} 2050 ins_pipe(pipe_slow); 2051 %} 2052 2053 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2054 predicate(UseAVX > 0); 2055 match(Set dst (AddD src con)); 2056 2057 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2058 ins_cost(150); 2059 ins_encode %{ 2060 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2061 %} 2062 ins_pipe(pipe_slow); 2063 %} 2064 2065 instruct subF_reg(regF dst, regF src) %{ 2066 predicate((UseSSE>=1) && (UseAVX == 0)); 2067 match(Set dst (SubF dst src)); 2068 2069 format %{ "subss $dst, $src" %} 2070 ins_cost(150); 2071 ins_encode %{ 2072 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2073 %} 2074 ins_pipe(pipe_slow); 2075 %} 2076 2077 instruct subF_mem(regF dst, memory src) %{ 2078 predicate((UseSSE>=1) && (UseAVX == 0)); 2079 match(Set dst (SubF dst (LoadF src))); 2080 2081 format %{ "subss $dst, $src" %} 2082 ins_cost(150); 2083 ins_encode %{ 2084 __ subss($dst$$XMMRegister, $src$$Address); 2085 %} 2086 ins_pipe(pipe_slow); 2087 %} 2088 2089 instruct subF_imm(regF dst, immF con) %{ 2090 predicate((UseSSE>=1) && (UseAVX == 0)); 2091 match(Set dst (SubF dst con)); 2092 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2093 ins_cost(150); 2094 ins_encode %{ 2095 __ subss($dst$$XMMRegister, $constantaddress($con)); 2096 %} 2097 ins_pipe(pipe_slow); 2098 %} 2099 2100 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2101 predicate(UseAVX > 0); 2102 match(Set dst (SubF src1 src2)); 2103 2104 format %{ "vsubss $dst, $src1, $src2" %} 2105 ins_cost(150); 2106 ins_encode %{ 2107 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2108 %} 2109 ins_pipe(pipe_slow); 2110 %} 2111 2112 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2113 predicate(UseAVX > 0); 2114 match(Set dst (SubF src1 (LoadF src2))); 2115 2116 format %{ "vsubss $dst, $src1, $src2" %} 2117 ins_cost(150); 2118 ins_encode %{ 2119 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2120 %} 2121 ins_pipe(pipe_slow); 2122 %} 2123 2124 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2125 predicate(UseAVX > 0); 2126 match(Set dst (SubF src con)); 2127 2128 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2129 ins_cost(150); 2130 ins_encode %{ 2131 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2132 %} 2133 ins_pipe(pipe_slow); 2134 %} 2135 2136 instruct subD_reg(regD dst, regD src) %{ 2137 predicate((UseSSE>=2) && (UseAVX == 0)); 2138 match(Set dst (SubD dst src)); 2139 2140 format %{ "subsd $dst, $src" %} 2141 ins_cost(150); 2142 ins_encode %{ 2143 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2144 %} 2145 ins_pipe(pipe_slow); 2146 %} 2147 2148 instruct subD_mem(regD dst, memory src) %{ 2149 predicate((UseSSE>=2) && (UseAVX == 0)); 2150 match(Set dst (SubD dst (LoadD src))); 2151 2152 format %{ "subsd $dst, $src" %} 2153 ins_cost(150); 2154 ins_encode %{ 2155 __ subsd($dst$$XMMRegister, $src$$Address); 2156 %} 2157 ins_pipe(pipe_slow); 2158 %} 2159 2160 instruct subD_imm(regD dst, immD con) %{ 2161 predicate((UseSSE>=2) && (UseAVX == 0)); 2162 match(Set dst (SubD dst con)); 2163 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2164 ins_cost(150); 2165 ins_encode %{ 2166 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2167 %} 2168 ins_pipe(pipe_slow); 2169 %} 2170 2171 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2172 predicate(UseAVX > 0); 2173 match(Set dst (SubD src1 src2)); 2174 2175 format %{ "vsubsd $dst, $src1, $src2" %} 2176 ins_cost(150); 2177 ins_encode %{ 2178 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2179 %} 2180 ins_pipe(pipe_slow); 2181 %} 2182 2183 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2184 predicate(UseAVX > 0); 2185 match(Set dst (SubD src1 (LoadD src2))); 2186 2187 format %{ "vsubsd $dst, $src1, $src2" %} 2188 ins_cost(150); 2189 ins_encode %{ 2190 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2191 %} 2192 ins_pipe(pipe_slow); 2193 %} 2194 2195 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2196 predicate(UseAVX > 0); 2197 match(Set dst (SubD src con)); 2198 2199 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2200 ins_cost(150); 2201 ins_encode %{ 2202 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2203 %} 2204 ins_pipe(pipe_slow); 2205 %} 2206 2207 instruct mulF_reg(regF dst, regF src) %{ 2208 predicate((UseSSE>=1) && (UseAVX == 0)); 2209 match(Set dst (MulF dst src)); 2210 2211 format %{ "mulss $dst, $src" %} 2212 ins_cost(150); 2213 ins_encode %{ 2214 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2215 %} 2216 ins_pipe(pipe_slow); 2217 %} 2218 2219 instruct mulF_mem(regF dst, memory src) %{ 2220 predicate((UseSSE>=1) && (UseAVX == 0)); 2221 match(Set dst (MulF dst (LoadF src))); 2222 2223 format %{ "mulss $dst, $src" %} 2224 ins_cost(150); 2225 ins_encode %{ 2226 __ mulss($dst$$XMMRegister, $src$$Address); 2227 %} 2228 ins_pipe(pipe_slow); 2229 %} 2230 2231 instruct mulF_imm(regF dst, immF con) %{ 2232 predicate((UseSSE>=1) && (UseAVX == 0)); 2233 match(Set dst (MulF dst con)); 2234 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2235 ins_cost(150); 2236 ins_encode %{ 2237 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2238 %} 2239 ins_pipe(pipe_slow); 2240 %} 2241 2242 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2243 predicate(UseAVX > 0); 2244 match(Set dst (MulF src1 src2)); 2245 2246 format %{ "vmulss $dst, $src1, $src2" %} 2247 ins_cost(150); 2248 ins_encode %{ 2249 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2250 %} 2251 ins_pipe(pipe_slow); 2252 %} 2253 2254 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2255 predicate(UseAVX > 0); 2256 match(Set dst (MulF src1 (LoadF src2))); 2257 2258 format %{ "vmulss $dst, $src1, $src2" %} 2259 ins_cost(150); 2260 ins_encode %{ 2261 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2262 %} 2263 ins_pipe(pipe_slow); 2264 %} 2265 2266 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2267 predicate(UseAVX > 0); 2268 match(Set dst (MulF src con)); 2269 2270 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2271 ins_cost(150); 2272 ins_encode %{ 2273 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2274 %} 2275 ins_pipe(pipe_slow); 2276 %} 2277 2278 instruct mulD_reg(regD dst, regD src) %{ 2279 predicate((UseSSE>=2) && (UseAVX == 0)); 2280 match(Set dst (MulD dst src)); 2281 2282 format %{ "mulsd $dst, $src" %} 2283 ins_cost(150); 2284 ins_encode %{ 2285 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2286 %} 2287 ins_pipe(pipe_slow); 2288 %} 2289 2290 instruct mulD_mem(regD dst, memory src) %{ 2291 predicate((UseSSE>=2) && (UseAVX == 0)); 2292 match(Set dst (MulD dst (LoadD src))); 2293 2294 format %{ "mulsd $dst, $src" %} 2295 ins_cost(150); 2296 ins_encode %{ 2297 __ mulsd($dst$$XMMRegister, $src$$Address); 2298 %} 2299 ins_pipe(pipe_slow); 2300 %} 2301 2302 instruct mulD_imm(regD dst, immD con) %{ 2303 predicate((UseSSE>=2) && (UseAVX == 0)); 2304 match(Set dst (MulD dst con)); 2305 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2306 ins_cost(150); 2307 ins_encode %{ 2308 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2309 %} 2310 ins_pipe(pipe_slow); 2311 %} 2312 2313 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2314 predicate(UseAVX > 0); 2315 match(Set dst (MulD src1 src2)); 2316 2317 format %{ "vmulsd $dst, $src1, $src2" %} 2318 ins_cost(150); 2319 ins_encode %{ 2320 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2321 %} 2322 ins_pipe(pipe_slow); 2323 %} 2324 2325 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2326 predicate(UseAVX > 0); 2327 match(Set dst (MulD src1 (LoadD src2))); 2328 2329 format %{ "vmulsd $dst, $src1, $src2" %} 2330 ins_cost(150); 2331 ins_encode %{ 2332 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2333 %} 2334 ins_pipe(pipe_slow); 2335 %} 2336 2337 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2338 predicate(UseAVX > 0); 2339 match(Set dst (MulD src con)); 2340 2341 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2342 ins_cost(150); 2343 ins_encode %{ 2344 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2345 %} 2346 ins_pipe(pipe_slow); 2347 %} 2348 2349 instruct divF_reg(regF dst, regF src) %{ 2350 predicate((UseSSE>=1) && (UseAVX == 0)); 2351 match(Set dst (DivF dst src)); 2352 2353 format %{ "divss $dst, $src" %} 2354 ins_cost(150); 2355 ins_encode %{ 2356 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2357 %} 2358 ins_pipe(pipe_slow); 2359 %} 2360 2361 instruct divF_mem(regF dst, memory src) %{ 2362 predicate((UseSSE>=1) && (UseAVX == 0)); 2363 match(Set dst (DivF dst (LoadF src))); 2364 2365 format %{ "divss $dst, $src" %} 2366 ins_cost(150); 2367 ins_encode %{ 2368 __ divss($dst$$XMMRegister, $src$$Address); 2369 %} 2370 ins_pipe(pipe_slow); 2371 %} 2372 2373 instruct divF_imm(regF dst, immF con) %{ 2374 predicate((UseSSE>=1) && (UseAVX == 0)); 2375 match(Set dst (DivF dst con)); 2376 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2377 ins_cost(150); 2378 ins_encode %{ 2379 __ divss($dst$$XMMRegister, $constantaddress($con)); 2380 %} 2381 ins_pipe(pipe_slow); 2382 %} 2383 2384 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2385 predicate(UseAVX > 0); 2386 match(Set dst (DivF src1 src2)); 2387 2388 format %{ "vdivss $dst, $src1, $src2" %} 2389 ins_cost(150); 2390 ins_encode %{ 2391 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2392 %} 2393 ins_pipe(pipe_slow); 2394 %} 2395 2396 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2397 predicate(UseAVX > 0); 2398 match(Set dst (DivF src1 (LoadF src2))); 2399 2400 format %{ "vdivss $dst, $src1, $src2" %} 2401 ins_cost(150); 2402 ins_encode %{ 2403 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2404 %} 2405 ins_pipe(pipe_slow); 2406 %} 2407 2408 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2409 predicate(UseAVX > 0); 2410 match(Set dst (DivF src con)); 2411 2412 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2413 ins_cost(150); 2414 ins_encode %{ 2415 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2416 %} 2417 ins_pipe(pipe_slow); 2418 %} 2419 2420 instruct divD_reg(regD dst, regD src) %{ 2421 predicate((UseSSE>=2) && (UseAVX == 0)); 2422 match(Set dst (DivD dst src)); 2423 2424 format %{ "divsd $dst, $src" %} 2425 ins_cost(150); 2426 ins_encode %{ 2427 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2428 %} 2429 ins_pipe(pipe_slow); 2430 %} 2431 2432 instruct divD_mem(regD dst, memory src) %{ 2433 predicate((UseSSE>=2) && (UseAVX == 0)); 2434 match(Set dst (DivD dst (LoadD src))); 2435 2436 format %{ "divsd $dst, $src" %} 2437 ins_cost(150); 2438 ins_encode %{ 2439 __ divsd($dst$$XMMRegister, $src$$Address); 2440 %} 2441 ins_pipe(pipe_slow); 2442 %} 2443 2444 instruct divD_imm(regD dst, immD con) %{ 2445 predicate((UseSSE>=2) && (UseAVX == 0)); 2446 match(Set dst (DivD dst con)); 2447 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2448 ins_cost(150); 2449 ins_encode %{ 2450 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2451 %} 2452 ins_pipe(pipe_slow); 2453 %} 2454 2455 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2456 predicate(UseAVX > 0); 2457 match(Set dst (DivD src1 src2)); 2458 2459 format %{ "vdivsd $dst, $src1, $src2" %} 2460 ins_cost(150); 2461 ins_encode %{ 2462 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2463 %} 2464 ins_pipe(pipe_slow); 2465 %} 2466 2467 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2468 predicate(UseAVX > 0); 2469 match(Set dst (DivD src1 (LoadD src2))); 2470 2471 format %{ "vdivsd $dst, $src1, $src2" %} 2472 ins_cost(150); 2473 ins_encode %{ 2474 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2475 %} 2476 ins_pipe(pipe_slow); 2477 %} 2478 2479 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2480 predicate(UseAVX > 0); 2481 match(Set dst (DivD src con)); 2482 2483 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2484 ins_cost(150); 2485 ins_encode %{ 2486 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2487 %} 2488 ins_pipe(pipe_slow); 2489 %} 2490 2491 instruct absF_reg(regF dst) %{ 2492 predicate((UseSSE>=1) && (UseAVX == 0)); 2493 match(Set dst (AbsF dst)); 2494 ins_cost(150); 2495 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2496 ins_encode %{ 2497 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2498 %} 2499 ins_pipe(pipe_slow); 2500 %} 2501 2502 instruct absF_reg_reg(regF dst, regF src) %{ 2503 predicate(VM_Version::supports_avxonly()); 2504 match(Set dst (AbsF src)); 2505 ins_cost(150); 2506 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2507 ins_encode %{ 2508 int vector_len = 0; 2509 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2510 ExternalAddress(float_signmask()), vector_len); 2511 %} 2512 ins_pipe(pipe_slow); 2513 %} 2514 2515 #ifdef _LP64 2516 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2517 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2518 match(Set dst (AbsF src)); 2519 ins_cost(150); 2520 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2521 ins_encode %{ 2522 int vector_len = 0; 2523 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2524 ExternalAddress(float_signmask()), vector_len); 2525 %} 2526 ins_pipe(pipe_slow); 2527 %} 2528 2529 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2530 predicate(VM_Version::supports_avx512novl()); 2531 match(Set dst (AbsF src1)); 2532 effect(TEMP src2); 2533 ins_cost(150); 2534 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2535 ins_encode %{ 2536 int vector_len = 0; 2537 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2538 ExternalAddress(float_signmask()), vector_len); 2539 %} 2540 ins_pipe(pipe_slow); 2541 %} 2542 #else // _LP64 2543 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2544 predicate(UseAVX > 2); 2545 match(Set dst (AbsF src)); 2546 ins_cost(150); 2547 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2548 ins_encode %{ 2549 int vector_len = 0; 2550 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2551 ExternalAddress(float_signmask()), vector_len); 2552 %} 2553 ins_pipe(pipe_slow); 2554 %} 2555 #endif 2556 2557 instruct absD_reg(regD dst) %{ 2558 predicate((UseSSE>=2) && (UseAVX == 0)); 2559 match(Set dst (AbsD dst)); 2560 ins_cost(150); 2561 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2562 "# abs double by sign masking" %} 2563 ins_encode %{ 2564 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2565 %} 2566 ins_pipe(pipe_slow); 2567 %} 2568 2569 instruct absD_reg_reg(regD dst, regD src) %{ 2570 predicate(VM_Version::supports_avxonly()); 2571 match(Set dst (AbsD src)); 2572 ins_cost(150); 2573 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2574 "# abs double by sign masking" %} 2575 ins_encode %{ 2576 int vector_len = 0; 2577 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2578 ExternalAddress(double_signmask()), vector_len); 2579 %} 2580 ins_pipe(pipe_slow); 2581 %} 2582 2583 #ifdef _LP64 2584 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2585 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2586 match(Set dst (AbsD src)); 2587 ins_cost(150); 2588 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2589 "# abs double by sign masking" %} 2590 ins_encode %{ 2591 int vector_len = 0; 2592 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2593 ExternalAddress(double_signmask()), vector_len); 2594 %} 2595 ins_pipe(pipe_slow); 2596 %} 2597 2598 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2599 predicate(VM_Version::supports_avx512novl()); 2600 match(Set dst (AbsD src1)); 2601 effect(TEMP src2); 2602 ins_cost(150); 2603 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2604 ins_encode %{ 2605 int vector_len = 0; 2606 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2607 ExternalAddress(double_signmask()), vector_len); 2608 %} 2609 ins_pipe(pipe_slow); 2610 %} 2611 #else // _LP64 2612 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2613 predicate(UseAVX > 2); 2614 match(Set dst (AbsD src)); 2615 ins_cost(150); 2616 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2617 "# abs double by sign masking" %} 2618 ins_encode %{ 2619 int vector_len = 0; 2620 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2621 ExternalAddress(double_signmask()), vector_len); 2622 %} 2623 ins_pipe(pipe_slow); 2624 %} 2625 #endif 2626 2627 instruct negI_rReg_2(rRegI dst, rFlagsReg cr) 2628 %{ 2629 match(Set dst (NegI dst)); 2630 effect(KILL cr); 2631 2632 format %{ "negl $dst\t# int" %} 2633 ins_encode %{ 2634 __ negl($dst$$Register); 2635 %} 2636 ins_pipe(ialu_reg); 2637 %} 2638 2639 instruct negL_rReg_2(rRegL dst, rFlagsReg cr) 2640 %{ 2641 match(Set dst (NegL dst)); 2642 effect(KILL cr); 2643 2644 format %{ "negq $dst\t# int" %} 2645 ins_encode %{ 2646 __ negq($dst$$Register); 2647 %} 2648 ins_pipe(ialu_reg); 2649 %} 2650 2651 instruct negF_reg(regF dst) %{ 2652 predicate((UseSSE>=1) && (UseAVX == 0)); 2653 match(Set dst (NegF dst)); 2654 ins_cost(150); 2655 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2656 ins_encode %{ 2657 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2658 %} 2659 ins_pipe(pipe_slow); 2660 %} 2661 2662 instruct negF_reg_reg(regF dst, regF src) %{ 2663 predicate(UseAVX > 0); 2664 match(Set dst (NegF src)); 2665 ins_cost(150); 2666 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2667 ins_encode %{ 2668 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2669 ExternalAddress(float_signflip())); 2670 %} 2671 ins_pipe(pipe_slow); 2672 %} 2673 2674 instruct negD_reg(regD dst) %{ 2675 predicate((UseSSE>=2) && (UseAVX == 0)); 2676 match(Set dst (NegD dst)); 2677 ins_cost(150); 2678 format %{ "xorpd $dst, [0x8000000000000000]\t" 2679 "# neg double by sign flipping" %} 2680 ins_encode %{ 2681 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2682 %} 2683 ins_pipe(pipe_slow); 2684 %} 2685 2686 instruct negD_reg_reg(regD dst, regD src) %{ 2687 predicate(UseAVX > 0); 2688 match(Set dst (NegD src)); 2689 ins_cost(150); 2690 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2691 "# neg double by sign flipping" %} 2692 ins_encode %{ 2693 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2694 ExternalAddress(double_signflip())); 2695 %} 2696 ins_pipe(pipe_slow); 2697 %} 2698 2699 instruct sqrtF_reg(regF dst, regF src) %{ 2700 predicate(UseSSE>=1); 2701 match(Set dst (SqrtF src)); 2702 2703 format %{ "sqrtss $dst, $src" %} 2704 ins_cost(150); 2705 ins_encode %{ 2706 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2707 %} 2708 ins_pipe(pipe_slow); 2709 %} 2710 2711 instruct sqrtF_mem(regF dst, memory src) %{ 2712 predicate(UseSSE>=1); 2713 match(Set dst (SqrtF (LoadF src))); 2714 2715 format %{ "sqrtss $dst, $src" %} 2716 ins_cost(150); 2717 ins_encode %{ 2718 __ sqrtss($dst$$XMMRegister, $src$$Address); 2719 %} 2720 ins_pipe(pipe_slow); 2721 %} 2722 2723 instruct sqrtF_imm(regF dst, immF con) %{ 2724 predicate(UseSSE>=1); 2725 match(Set dst (SqrtF con)); 2726 2727 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2728 ins_cost(150); 2729 ins_encode %{ 2730 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2731 %} 2732 ins_pipe(pipe_slow); 2733 %} 2734 2735 instruct sqrtD_reg(regD dst, regD src) %{ 2736 predicate(UseSSE>=2); 2737 match(Set dst (SqrtD src)); 2738 2739 format %{ "sqrtsd $dst, $src" %} 2740 ins_cost(150); 2741 ins_encode %{ 2742 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 instruct sqrtD_mem(regD dst, memory src) %{ 2748 predicate(UseSSE>=2); 2749 match(Set dst (SqrtD (LoadD src))); 2750 2751 format %{ "sqrtsd $dst, $src" %} 2752 ins_cost(150); 2753 ins_encode %{ 2754 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2755 %} 2756 ins_pipe(pipe_slow); 2757 %} 2758 2759 instruct sqrtD_imm(regD dst, immD con) %{ 2760 predicate(UseSSE>=2); 2761 match(Set dst (SqrtD con)); 2762 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2763 ins_cost(150); 2764 ins_encode %{ 2765 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2766 %} 2767 ins_pipe(pipe_slow); 2768 %} 2769 2770 instruct onspinwait() %{ 2771 match(OnSpinWait); 2772 ins_cost(200); 2773 2774 format %{ 2775 $$template 2776 if (os::is_MP()) { 2777 $$emit$$"pause\t! membar_onspinwait" 2778 } else { 2779 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2780 } 2781 %} 2782 ins_encode %{ 2783 __ pause(); 2784 %} 2785 ins_pipe(pipe_slow); 2786 %} 2787 2788 // a * b + c 2789 instruct fmaD_reg(regD a, regD b, regD c) %{ 2790 predicate(UseFMA); 2791 match(Set c (FmaD c (Binary a b))); 2792 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2793 ins_cost(150); 2794 ins_encode %{ 2795 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2796 %} 2797 ins_pipe( pipe_slow ); 2798 %} 2799 2800 // a * b + c 2801 instruct fmaF_reg(regF a, regF b, regF c) %{ 2802 predicate(UseFMA); 2803 match(Set c (FmaF c (Binary a b))); 2804 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2805 ins_cost(150); 2806 ins_encode %{ 2807 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2808 %} 2809 ins_pipe( pipe_slow ); 2810 %} 2811 2812 // ====================VECTOR INSTRUCTIONS===================================== 2813 2814 instruct reinterpretS(vecS dst) %{ 2815 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2816 match(Set dst (VectorReinterpret dst)); 2817 ins_cost(125); 2818 format %{ " # reinterpret $dst" %} 2819 ins_encode %{ 2820 // empty 2821 %} 2822 ins_pipe( pipe_slow ); 2823 %} 2824 2825 instruct reinterpretS2D(vecD dst, vecS src) %{ 2826 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2827 match(Set dst (VectorReinterpret src)); 2828 ins_cost(125); 2829 effect(TEMP dst); 2830 format %{ " # reinterpret $dst,$src" %} 2831 ins_encode %{ 2832 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2833 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2834 %} 2835 ins_pipe( pipe_slow ); 2836 %} 2837 2838 instruct reinterpretS2X(vecX dst, vecS src) %{ 2839 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2840 match(Set dst (VectorReinterpret src)); 2841 ins_cost(125); 2842 effect(TEMP dst); 2843 format %{ " # reinterpret $dst,$src" %} 2844 ins_encode %{ 2845 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2846 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2847 %} 2848 ins_pipe( pipe_slow ); 2849 %} 2850 2851 instruct reinterpretS2Y(vecY dst, vecS src) %{ 2852 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2853 match(Set dst (VectorReinterpret src)); 2854 ins_cost(125); 2855 effect(TEMP dst); 2856 format %{ " # reinterpret $dst,$src" %} 2857 ins_encode %{ 2858 int vector_len = 1; 2859 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2860 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2861 %} 2862 ins_pipe( pipe_slow ); 2863 %} 2864 2865 instruct reinterpretS2Z(vecZ dst, vecS src) %{ 2866 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 2867 match(Set dst (VectorReinterpret src)); 2868 ins_cost(125); 2869 effect(TEMP dst); 2870 format %{ " # reinterpret $dst,$src" %} 2871 ins_encode %{ 2872 int vector_len = 2; 2873 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2874 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2875 %} 2876 ins_pipe( pipe_slow ); 2877 %} 2878 2879 instruct reinterpretD2S(vecS dst, vecD src) %{ 2880 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2881 match(Set dst (VectorReinterpret src)); 2882 ins_cost(125); 2883 format %{ " # reinterpret $dst,$src" %} 2884 ins_encode %{ 2885 // If register is the same, then move is not needed. 2886 if ($dst$$XMMRegister != $src$$XMMRegister) { 2887 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2888 } 2889 %} 2890 ins_pipe( pipe_slow ); 2891 %} 2892 2893 instruct reinterpretD(vecD dst) %{ 2894 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2895 match(Set dst (VectorReinterpret dst)); 2896 ins_cost(125); 2897 format %{ " # reinterpret $dst" %} 2898 ins_encode %{ 2899 // empty 2900 %} 2901 ins_pipe( pipe_slow ); 2902 %} 2903 2904 instruct reinterpretD2X(vecX dst, vecD src) %{ 2905 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2906 match(Set dst (VectorReinterpret src)); 2907 ins_cost(125); 2908 effect(TEMP dst); 2909 format %{ " # reinterpret $dst,$src" %} 2910 ins_encode %{ 2911 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2912 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2913 %} 2914 ins_pipe( pipe_slow ); 2915 %} 2916 2917 instruct reinterpretD2Y(vecY dst, vecD src) %{ 2918 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2919 match(Set dst (VectorReinterpret src)); 2920 ins_cost(125); 2921 effect(TEMP dst); 2922 format %{ " # reinterpret $dst,$src" %} 2923 ins_encode %{ 2924 int vector_len = 1; 2925 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2926 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2927 %} 2928 ins_pipe( pipe_slow ); 2929 %} 2930 2931 instruct reinterpretD2Z(vecZ dst, vecD src) %{ 2932 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 2933 match(Set dst (VectorReinterpret src)); 2934 ins_cost(125); 2935 effect(TEMP dst); 2936 format %{ " # reinterpret $dst,$src" %} 2937 ins_encode %{ 2938 int vector_len = 2; 2939 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2940 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2941 %} 2942 ins_pipe( pipe_slow ); 2943 %} 2944 2945 instruct reinterpretX2S(vecS dst, vecX src) %{ 2946 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2947 match(Set dst (VectorReinterpret src)); 2948 ins_cost(125); 2949 format %{ " # reinterpret $dst,$src" %} 2950 ins_encode %{ 2951 // If register is the same, then move is not needed. 2952 if ($dst$$XMMRegister != $src$$XMMRegister) { 2953 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2954 } 2955 %} 2956 ins_pipe( pipe_slow ); 2957 %} 2958 2959 instruct reinterpretX2D(vecD dst, vecX src) %{ 2960 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2961 match(Set dst (VectorReinterpret src)); 2962 ins_cost(125); 2963 format %{ " # reinterpret $dst,$src" %} 2964 ins_encode %{ 2965 // If register is the same, then move is not needed. 2966 if ($dst$$XMMRegister != $src$$XMMRegister) { 2967 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 2968 } 2969 %} 2970 ins_pipe( pipe_slow ); 2971 %} 2972 2973 instruct reinterpretX(vecX dst) %{ 2974 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2975 match(Set dst (VectorReinterpret dst)); 2976 ins_cost(125); 2977 format %{ " # reinterpret $dst" %} 2978 ins_encode %{ 2979 // empty 2980 %} 2981 ins_pipe( pipe_slow ); 2982 %} 2983 2984 instruct reinterpretX2Y(vecY dst, vecX src) %{ 2985 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 2986 match(Set dst (VectorReinterpret src)); 2987 ins_cost(125); 2988 effect(TEMP dst); 2989 format %{ " # reinterpret $dst,$src" %} 2990 ins_encode %{ 2991 int vector_len = 1; 2992 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 2993 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 2994 %} 2995 ins_pipe( pipe_slow ); 2996 %} 2997 2998 instruct reinterpretX2Z(vecZ dst, vecX src) %{ 2999 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3000 match(Set dst (VectorReinterpret src)); 3001 ins_cost(125); 3002 effect(TEMP dst); 3003 format %{ " # reinterpret $dst,$src" %} 3004 ins_encode %{ 3005 int vector_len = 2; 3006 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3007 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3008 %} 3009 ins_pipe( pipe_slow ); 3010 %} 3011 3012 instruct reinterpretY2S(vecS dst, vecY src) %{ 3013 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3014 match(Set dst (VectorReinterpret src)); 3015 ins_cost(125); 3016 format %{ " # reinterpret $dst,$src" %} 3017 ins_encode %{ 3018 // If register is the same, then move is not needed. 3019 if ($dst$$XMMRegister != $src$$XMMRegister) { 3020 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3021 } 3022 %} 3023 ins_pipe( pipe_slow ); 3024 %} 3025 3026 instruct reinterpretY2D(vecD dst, vecY src) %{ 3027 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3028 match(Set dst (VectorReinterpret src)); 3029 ins_cost(125); 3030 format %{ " # reinterpret $dst,$src" %} 3031 ins_encode %{ 3032 // If register is the same, then move is not needed. 3033 if ($dst$$XMMRegister != $src$$XMMRegister) { 3034 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3035 } 3036 %} 3037 ins_pipe( pipe_slow ); 3038 %} 3039 3040 instruct reinterpretY2X(vecX dst, vecY src) %{ 3041 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3042 match(Set dst (VectorReinterpret src)); 3043 ins_cost(125); 3044 format %{ " # reinterpret $dst,$src" %} 3045 ins_encode %{ 3046 // If register is the same, then move is not needed. 3047 if ($dst$$XMMRegister != $src$$XMMRegister) { 3048 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3049 } 3050 %} 3051 ins_pipe( pipe_slow ); 3052 %} 3053 3054 instruct reinterpretY(vecY dst) %{ 3055 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3056 match(Set dst (VectorReinterpret dst)); 3057 ins_cost(125); 3058 format %{ " # reinterpret $dst" %} 3059 ins_encode %{ 3060 // empty 3061 %} 3062 ins_pipe( pipe_slow ); 3063 %} 3064 3065 instruct reinterpretY2Z(vecZ dst, vecY src) %{ 3066 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3067 match(Set dst (VectorReinterpret src)); 3068 ins_cost(125); 3069 effect(TEMP dst); 3070 format %{ " # reinterpret $dst,$src" %} 3071 ins_encode %{ 3072 int vector_len = 2; 3073 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3074 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3075 %} 3076 ins_pipe( pipe_slow ); 3077 %} 3078 3079 instruct reinterpretZ2S(vecS dst, vecZ src) %{ 3080 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3081 match(Set dst (VectorReinterpret src)); 3082 ins_cost(125); 3083 format %{ " # reinterpret $dst,$src" %} 3084 ins_encode %{ 3085 // If register is the same, then move is not needed. 3086 if ($dst$$XMMRegister != $src$$XMMRegister) { 3087 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3088 } 3089 %} 3090 ins_pipe( pipe_slow ); 3091 %} 3092 3093 instruct reinterpretZ2D(vecD dst, vecZ src) %{ 3094 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3095 match(Set dst (VectorReinterpret src)); 3096 ins_cost(125); 3097 format %{ " # reinterpret $dst,$src" %} 3098 ins_encode %{ 3099 // If register is the same, then move is not needed. 3100 if ($dst$$XMMRegister != $src$$XMMRegister) { 3101 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3102 } 3103 %} 3104 ins_pipe( pipe_slow ); 3105 %} 3106 3107 instruct reinterpretZ2X(vecX dst, vecZ src) %{ 3108 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3109 match(Set dst (VectorReinterpret src)); 3110 ins_cost(125); 3111 format %{ " # reinterpret $dst,$src" %} 3112 ins_encode %{ 3113 // If register is the same, then move is not needed. 3114 if ($dst$$XMMRegister != $src$$XMMRegister) { 3115 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3116 } 3117 %} 3118 ins_pipe( pipe_slow ); 3119 %} 3120 3121 instruct reinterpretZ2Y(vecY dst, vecZ src) %{ 3122 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3123 match(Set dst (VectorReinterpret src)); 3124 ins_cost(125); 3125 format %{ " # reinterpret $dst,$src" %} 3126 ins_encode %{ 3127 // If register is the same, then move is not needed. 3128 if ($dst$$XMMRegister != $src$$XMMRegister) { 3129 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3130 } 3131 %} 3132 ins_pipe( pipe_slow ); 3133 %} 3134 3135 instruct reinterpretZ(vecZ dst) %{ 3136 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3137 match(Set dst (VectorReinterpret dst)); 3138 ins_cost(125); 3139 format %{ " # reinterpret $dst" %} 3140 ins_encode %{ 3141 // empty 3142 %} 3143 ins_pipe( pipe_slow ); 3144 %} 3145 3146 // ========== 3147 3148 // Load vectors (1 byte long) 3149 instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ 3150 predicate(n->as_LoadVector()->memory_size() == 1); 3151 match(Set dst (LoadVector mem)); 3152 ins_cost(125); 3153 effect(TEMP tmp); 3154 format %{ "movzbl $tmp,$mem\n\t" 3155 "movd $dst,$tmp\t! load vector (1 byte)" %} 3156 ins_encode %{ 3157 __ movzbl($tmp$$Register, $mem$$Address); 3158 __ movdl($dst$$XMMRegister, $tmp$$Register); 3159 %} 3160 ins_pipe( pipe_slow ); 3161 %} 3162 3163 // Load vectors (2 bytes long) 3164 instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ 3165 predicate(n->as_LoadVector()->memory_size() == 2); 3166 match(Set dst (LoadVector mem)); 3167 ins_cost(125); 3168 effect(TEMP tmp); 3169 format %{ "movzwl $tmp,$mem\n\t" 3170 "movd $dst,$tmp\t! load vector (2 bytes)" %} 3171 ins_encode %{ 3172 __ movzwl($tmp$$Register, $mem$$Address); 3173 __ movdl($dst$$XMMRegister, $tmp$$Register); 3174 %} 3175 ins_pipe( pipe_slow ); 3176 %} 3177 3178 // Load vectors (4 bytes long) 3179 instruct loadV4(vecS dst, memory mem) %{ 3180 predicate(n->as_LoadVector()->memory_size() == 4); 3181 match(Set dst (LoadVector mem)); 3182 ins_cost(125); 3183 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3184 ins_encode %{ 3185 __ movdl($dst$$XMMRegister, $mem$$Address); 3186 %} 3187 ins_pipe( pipe_slow ); 3188 %} 3189 3190 // Load vectors (8 bytes long) 3191 instruct loadV8(vecD dst, memory mem) %{ 3192 predicate(n->as_LoadVector()->memory_size() == 8); 3193 match(Set dst (LoadVector mem)); 3194 ins_cost(125); 3195 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3196 ins_encode %{ 3197 __ movq($dst$$XMMRegister, $mem$$Address); 3198 %} 3199 ins_pipe( pipe_slow ); 3200 %} 3201 3202 // Load vectors (16 bytes long) 3203 instruct loadV16(vecX dst, memory mem) %{ 3204 predicate(n->as_LoadVector()->memory_size() == 16); 3205 match(Set dst (LoadVector mem)); 3206 ins_cost(125); 3207 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3208 ins_encode %{ 3209 __ movdqu($dst$$XMMRegister, $mem$$Address); 3210 %} 3211 ins_pipe( pipe_slow ); 3212 %} 3213 3214 // Load vectors (32 bytes long) 3215 instruct loadV32(vecY dst, memory mem) %{ 3216 predicate(n->as_LoadVector()->memory_size() == 32); 3217 match(Set dst (LoadVector mem)); 3218 ins_cost(125); 3219 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3220 ins_encode %{ 3221 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3222 %} 3223 ins_pipe( pipe_slow ); 3224 %} 3225 3226 // Load vectors (64 bytes long) 3227 instruct loadV64_dword(vecZ dst, memory mem) %{ 3228 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3229 match(Set dst (LoadVector mem)); 3230 ins_cost(125); 3231 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3232 ins_encode %{ 3233 int vector_len = 2; 3234 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 3239 // Load vectors (64 bytes long) 3240 instruct loadV64_qword(vecZ dst, memory mem) %{ 3241 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3242 match(Set dst (LoadVector mem)); 3243 ins_cost(125); 3244 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3245 ins_encode %{ 3246 int vector_len = 2; 3247 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3248 %} 3249 ins_pipe( pipe_slow ); 3250 %} 3251 3252 // Store vectors 3253 instruct storeV1(memory mem, vecS src, rRegI tmp) %{ 3254 predicate(n->as_StoreVector()->memory_size() == 1); 3255 match(Set mem (StoreVector mem src)); 3256 ins_cost(145); 3257 effect(TEMP tmp); 3258 format %{ "movd $tmp,$src\n\t" 3259 "movb $mem,$tmp\t! store vector (1 byte)" %} 3260 ins_encode %{ 3261 __ movdl($tmp$$Register, $src$$XMMRegister); 3262 __ movb($mem$$Address, $tmp$$Register); 3263 %} 3264 ins_pipe( pipe_slow ); 3265 %} 3266 3267 instruct storeV2(memory mem, vecS src, rRegI tmp) %{ 3268 predicate(n->as_StoreVector()->memory_size() == 2); 3269 match(Set mem (StoreVector mem src)); 3270 ins_cost(145); 3271 effect(TEMP tmp); 3272 format %{ "movd $tmp,$src\n\t" 3273 "movw $mem,$tmp\t! store vector (2 bytes)" %} 3274 ins_encode %{ 3275 __ movdl($tmp$$Register, $src$$XMMRegister); 3276 __ movw($mem$$Address, $tmp$$Register); 3277 %} 3278 ins_pipe( pipe_slow ); 3279 %} 3280 3281 instruct storeV4(memory mem, vecS src) %{ 3282 predicate(n->as_StoreVector()->memory_size() == 4); 3283 match(Set mem (StoreVector mem src)); 3284 ins_cost(145); 3285 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3286 ins_encode %{ 3287 __ movdl($mem$$Address, $src$$XMMRegister); 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 instruct storeV8(memory mem, vecD src) %{ 3293 predicate(n->as_StoreVector()->memory_size() == 8); 3294 match(Set mem (StoreVector mem src)); 3295 ins_cost(145); 3296 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3297 ins_encode %{ 3298 __ movq($mem$$Address, $src$$XMMRegister); 3299 %} 3300 ins_pipe( pipe_slow ); 3301 %} 3302 3303 instruct storeV16(memory mem, vecX src) %{ 3304 predicate(n->as_StoreVector()->memory_size() == 16); 3305 match(Set mem (StoreVector mem src)); 3306 ins_cost(145); 3307 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3308 ins_encode %{ 3309 __ movdqu($mem$$Address, $src$$XMMRegister); 3310 %} 3311 ins_pipe( pipe_slow ); 3312 %} 3313 3314 instruct storeV32(memory mem, vecY src) %{ 3315 predicate(n->as_StoreVector()->memory_size() == 32); 3316 match(Set mem (StoreVector mem src)); 3317 ins_cost(145); 3318 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3319 ins_encode %{ 3320 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3321 %} 3322 ins_pipe( pipe_slow ); 3323 %} 3324 3325 instruct storeV64_dword(memory mem, vecZ src) %{ 3326 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3327 match(Set mem (StoreVector mem src)); 3328 ins_cost(145); 3329 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3330 ins_encode %{ 3331 int vector_len = 2; 3332 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3333 %} 3334 ins_pipe( pipe_slow ); 3335 %} 3336 3337 instruct storeV64_qword(memory mem, vecZ src) %{ 3338 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3339 match(Set mem (StoreVector mem src)); 3340 ins_cost(145); 3341 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3342 ins_encode %{ 3343 int vector_len = 2; 3344 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3345 %} 3346 ins_pipe( pipe_slow ); 3347 %} 3348 3349 // ====================LEGACY REPLICATE======================================= 3350 3351 instruct Repl4B_mem(vecS dst, memory mem) %{ 3352 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3353 match(Set dst (ReplicateB (LoadB mem))); 3354 format %{ "punpcklbw $dst,$mem\n\t" 3355 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3356 ins_encode %{ 3357 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3358 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3359 %} 3360 ins_pipe( pipe_slow ); 3361 %} 3362 3363 instruct Repl8B_mem(vecD dst, memory mem) %{ 3364 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3365 match(Set dst (ReplicateB (LoadB mem))); 3366 format %{ "punpcklbw $dst,$mem\n\t" 3367 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3368 ins_encode %{ 3369 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3370 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3371 %} 3372 ins_pipe( pipe_slow ); 3373 %} 3374 3375 instruct Repl16B(vecX dst, rRegI src) %{ 3376 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3377 match(Set dst (ReplicateB src)); 3378 format %{ "movd $dst,$src\n\t" 3379 "punpcklbw $dst,$dst\n\t" 3380 "pshuflw $dst,$dst,0x00\n\t" 3381 "punpcklqdq $dst,$dst\t! replicate16B" %} 3382 ins_encode %{ 3383 __ movdl($dst$$XMMRegister, $src$$Register); 3384 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3385 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3386 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3387 %} 3388 ins_pipe( pipe_slow ); 3389 %} 3390 3391 instruct Repl16B_mem(vecX dst, memory mem) %{ 3392 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3393 match(Set dst (ReplicateB (LoadB mem))); 3394 format %{ "punpcklbw $dst,$mem\n\t" 3395 "pshuflw $dst,$dst,0x00\n\t" 3396 "punpcklqdq $dst,$dst\t! replicate16B" %} 3397 ins_encode %{ 3398 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3399 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3400 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3401 %} 3402 ins_pipe( pipe_slow ); 3403 %} 3404 3405 instruct Repl32B(vecY dst, rRegI src) %{ 3406 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3407 match(Set dst (ReplicateB src)); 3408 format %{ "movd $dst,$src\n\t" 3409 "punpcklbw $dst,$dst\n\t" 3410 "pshuflw $dst,$dst,0x00\n\t" 3411 "punpcklqdq $dst,$dst\n\t" 3412 "vinserti128_high $dst,$dst\t! replicate32B" %} 3413 ins_encode %{ 3414 __ movdl($dst$$XMMRegister, $src$$Register); 3415 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3416 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3417 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3418 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3419 %} 3420 ins_pipe( pipe_slow ); 3421 %} 3422 3423 instruct Repl32B_mem(vecY dst, memory mem) %{ 3424 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3425 match(Set dst (ReplicateB (LoadB mem))); 3426 format %{ "punpcklbw $dst,$mem\n\t" 3427 "pshuflw $dst,$dst,0x00\n\t" 3428 "punpcklqdq $dst,$dst\n\t" 3429 "vinserti128_high $dst,$dst\t! replicate32B" %} 3430 ins_encode %{ 3431 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3432 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3433 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3434 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3435 %} 3436 ins_pipe( pipe_slow ); 3437 %} 3438 3439 instruct Repl16B_imm(vecX dst, immI con) %{ 3440 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3441 match(Set dst (ReplicateB con)); 3442 format %{ "movq $dst,[$constantaddress]\n\t" 3443 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3444 ins_encode %{ 3445 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3446 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3447 %} 3448 ins_pipe( pipe_slow ); 3449 %} 3450 3451 instruct Repl32B_imm(vecY dst, immI con) %{ 3452 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3453 match(Set dst (ReplicateB con)); 3454 format %{ "movq $dst,[$constantaddress]\n\t" 3455 "punpcklqdq $dst,$dst\n\t" 3456 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3457 ins_encode %{ 3458 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3459 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3460 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3461 %} 3462 ins_pipe( pipe_slow ); 3463 %} 3464 3465 instruct Repl4S(vecD dst, rRegI src) %{ 3466 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3467 match(Set dst (ReplicateS src)); 3468 format %{ "movd $dst,$src\n\t" 3469 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3470 ins_encode %{ 3471 __ movdl($dst$$XMMRegister, $src$$Register); 3472 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3473 %} 3474 ins_pipe( pipe_slow ); 3475 %} 3476 3477 instruct Repl4S_mem(vecD dst, memory mem) %{ 3478 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3479 match(Set dst (ReplicateS (LoadS mem))); 3480 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3481 ins_encode %{ 3482 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3483 %} 3484 ins_pipe( pipe_slow ); 3485 %} 3486 3487 instruct Repl8S(vecX dst, rRegI src) %{ 3488 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3489 match(Set dst (ReplicateS src)); 3490 format %{ "movd $dst,$src\n\t" 3491 "pshuflw $dst,$dst,0x00\n\t" 3492 "punpcklqdq $dst,$dst\t! replicate8S" %} 3493 ins_encode %{ 3494 __ movdl($dst$$XMMRegister, $src$$Register); 3495 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3496 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3497 %} 3498 ins_pipe( pipe_slow ); 3499 %} 3500 3501 instruct Repl8S_mem(vecX dst, memory mem) %{ 3502 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3503 match(Set dst (ReplicateS (LoadS mem))); 3504 format %{ "pshuflw $dst,$mem,0x00\n\t" 3505 "punpcklqdq $dst,$dst\t! replicate8S" %} 3506 ins_encode %{ 3507 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3508 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3509 %} 3510 ins_pipe( pipe_slow ); 3511 %} 3512 3513 instruct Repl8S_imm(vecX dst, immI con) %{ 3514 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3515 match(Set dst (ReplicateS con)); 3516 format %{ "movq $dst,[$constantaddress]\n\t" 3517 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3518 ins_encode %{ 3519 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3520 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3521 %} 3522 ins_pipe( pipe_slow ); 3523 %} 3524 3525 instruct Repl16S(vecY dst, rRegI src) %{ 3526 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3527 match(Set dst (ReplicateS src)); 3528 format %{ "movd $dst,$src\n\t" 3529 "pshuflw $dst,$dst,0x00\n\t" 3530 "punpcklqdq $dst,$dst\n\t" 3531 "vinserti128_high $dst,$dst\t! replicate16S" %} 3532 ins_encode %{ 3533 __ movdl($dst$$XMMRegister, $src$$Register); 3534 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3535 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3536 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3537 %} 3538 ins_pipe( pipe_slow ); 3539 %} 3540 3541 instruct Repl16S_mem(vecY dst, memory mem) %{ 3542 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3543 match(Set dst (ReplicateS (LoadS mem))); 3544 format %{ "pshuflw $dst,$mem,0x00\n\t" 3545 "punpcklqdq $dst,$dst\n\t" 3546 "vinserti128_high $dst,$dst\t! replicate16S" %} 3547 ins_encode %{ 3548 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3550 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe( pipe_slow ); 3553 %} 3554 3555 instruct Repl16S_imm(vecY dst, immI con) %{ 3556 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3557 match(Set dst (ReplicateS con)); 3558 format %{ "movq $dst,[$constantaddress]\n\t" 3559 "punpcklqdq $dst,$dst\n\t" 3560 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3561 ins_encode %{ 3562 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3563 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3564 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3565 %} 3566 ins_pipe( pipe_slow ); 3567 %} 3568 3569 instruct Repl4I(vecX dst, rRegI src) %{ 3570 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3571 match(Set dst (ReplicateI src)); 3572 format %{ "movd $dst,$src\n\t" 3573 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3574 ins_encode %{ 3575 __ movdl($dst$$XMMRegister, $src$$Register); 3576 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3577 %} 3578 ins_pipe( pipe_slow ); 3579 %} 3580 3581 instruct Repl4I_mem(vecX dst, memory mem) %{ 3582 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3583 match(Set dst (ReplicateI (LoadI mem))); 3584 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3585 ins_encode %{ 3586 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3587 %} 3588 ins_pipe( pipe_slow ); 3589 %} 3590 3591 instruct Repl8I(vecY dst, rRegI src) %{ 3592 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3593 match(Set dst (ReplicateI src)); 3594 format %{ "movd $dst,$src\n\t" 3595 "pshufd $dst,$dst,0x00\n\t" 3596 "vinserti128_high $dst,$dst\t! replicate8I" %} 3597 ins_encode %{ 3598 __ movdl($dst$$XMMRegister, $src$$Register); 3599 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3600 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3601 %} 3602 ins_pipe( pipe_slow ); 3603 %} 3604 3605 instruct Repl8I_mem(vecY dst, memory mem) %{ 3606 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3607 match(Set dst (ReplicateI (LoadI mem))); 3608 format %{ "pshufd $dst,$mem,0x00\n\t" 3609 "vinserti128_high $dst,$dst\t! replicate8I" %} 3610 ins_encode %{ 3611 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3612 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 instruct Repl4I_imm(vecX dst, immI con) %{ 3618 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3619 match(Set dst (ReplicateI con)); 3620 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3621 "punpcklqdq $dst,$dst" %} 3622 ins_encode %{ 3623 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3624 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3625 %} 3626 ins_pipe( pipe_slow ); 3627 %} 3628 3629 instruct Repl8I_imm(vecY dst, immI con) %{ 3630 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3631 match(Set dst (ReplicateI con)); 3632 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3633 "punpcklqdq $dst,$dst\n\t" 3634 "vinserti128_high $dst,$dst" %} 3635 ins_encode %{ 3636 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3637 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3638 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 3643 // Long could be loaded into xmm register directly from memory. 3644 instruct Repl2L_mem(vecX dst, memory mem) %{ 3645 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3646 match(Set dst (ReplicateL (LoadL mem))); 3647 format %{ "movq $dst,$mem\n\t" 3648 "punpcklqdq $dst,$dst\t! replicate2L" %} 3649 ins_encode %{ 3650 __ movq($dst$$XMMRegister, $mem$$Address); 3651 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3652 %} 3653 ins_pipe( pipe_slow ); 3654 %} 3655 3656 // Replicate long (8 byte) scalar to be vector 3657 #ifdef _LP64 3658 instruct Repl4L(vecY dst, rRegL src) %{ 3659 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3660 match(Set dst (ReplicateL src)); 3661 format %{ "movdq $dst,$src\n\t" 3662 "punpcklqdq $dst,$dst\n\t" 3663 "vinserti128_high $dst,$dst\t! replicate4L" %} 3664 ins_encode %{ 3665 __ movdq($dst$$XMMRegister, $src$$Register); 3666 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3667 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3668 %} 3669 ins_pipe( pipe_slow ); 3670 %} 3671 #else // _LP64 3672 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3673 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3674 match(Set dst (ReplicateL src)); 3675 effect(TEMP dst, USE src, TEMP tmp); 3676 format %{ "movdl $dst,$src.lo\n\t" 3677 "movdl $tmp,$src.hi\n\t" 3678 "punpckldq $dst,$tmp\n\t" 3679 "punpcklqdq $dst,$dst\n\t" 3680 "vinserti128_high $dst,$dst\t! replicate4L" %} 3681 ins_encode %{ 3682 __ movdl($dst$$XMMRegister, $src$$Register); 3683 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3684 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3685 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3686 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3687 %} 3688 ins_pipe( pipe_slow ); 3689 %} 3690 #endif // _LP64 3691 3692 instruct Repl4L_imm(vecY dst, immL con) %{ 3693 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3694 match(Set dst (ReplicateL con)); 3695 format %{ "movq $dst,[$constantaddress]\n\t" 3696 "punpcklqdq $dst,$dst\n\t" 3697 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3698 ins_encode %{ 3699 __ movq($dst$$XMMRegister, $constantaddress($con)); 3700 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3701 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3702 %} 3703 ins_pipe( pipe_slow ); 3704 %} 3705 3706 instruct Repl4L_mem(vecY dst, memory mem) %{ 3707 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3708 match(Set dst (ReplicateL (LoadL mem))); 3709 format %{ "movq $dst,$mem\n\t" 3710 "punpcklqdq $dst,$dst\n\t" 3711 "vinserti128_high $dst,$dst\t! replicate4L" %} 3712 ins_encode %{ 3713 __ movq($dst$$XMMRegister, $mem$$Address); 3714 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3715 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct Repl2F_mem(vecD dst, memory mem) %{ 3721 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3722 match(Set dst (ReplicateF (LoadF mem))); 3723 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3724 ins_encode %{ 3725 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3726 %} 3727 ins_pipe( pipe_slow ); 3728 %} 3729 3730 instruct Repl4F_mem(vecX dst, memory mem) %{ 3731 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3732 match(Set dst (ReplicateF (LoadF mem))); 3733 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3734 ins_encode %{ 3735 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 instruct Repl8F(vecY dst, regF src) %{ 3741 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3742 match(Set dst (ReplicateF src)); 3743 format %{ "pshufd $dst,$src,0x00\n\t" 3744 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3745 ins_encode %{ 3746 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3747 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3748 %} 3749 ins_pipe( pipe_slow ); 3750 %} 3751 3752 instruct Repl8F_mem(vecY dst, memory mem) %{ 3753 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3754 match(Set dst (ReplicateF (LoadF mem))); 3755 format %{ "pshufd $dst,$mem,0x00\n\t" 3756 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3757 ins_encode %{ 3758 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3759 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3765 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3766 match(Set dst (ReplicateF zero)); 3767 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3768 ins_encode %{ 3769 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3770 %} 3771 ins_pipe( fpu_reg_reg ); 3772 %} 3773 3774 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3775 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3776 match(Set dst (ReplicateF zero)); 3777 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3778 ins_encode %{ 3779 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3780 %} 3781 ins_pipe( fpu_reg_reg ); 3782 %} 3783 3784 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3785 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3786 match(Set dst (ReplicateF zero)); 3787 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3788 ins_encode %{ 3789 int vector_len = 1; 3790 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3791 %} 3792 ins_pipe( fpu_reg_reg ); 3793 %} 3794 3795 instruct Repl2D_mem(vecX dst, memory mem) %{ 3796 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3797 match(Set dst (ReplicateD (LoadD mem))); 3798 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3799 ins_encode %{ 3800 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3801 %} 3802 ins_pipe( pipe_slow ); 3803 %} 3804 3805 instruct Repl4D(vecY dst, regD src) %{ 3806 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3807 match(Set dst (ReplicateD src)); 3808 format %{ "pshufd $dst,$src,0x44\n\t" 3809 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3810 ins_encode %{ 3811 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3812 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3813 %} 3814 ins_pipe( pipe_slow ); 3815 %} 3816 3817 instruct Repl4D_mem(vecY dst, memory mem) %{ 3818 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3819 match(Set dst (ReplicateD (LoadD mem))); 3820 format %{ "pshufd $dst,$mem,0x44\n\t" 3821 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3822 ins_encode %{ 3823 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3824 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3825 %} 3826 ins_pipe( pipe_slow ); 3827 %} 3828 3829 // Replicate double (8 byte) scalar zero to be vector 3830 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3831 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3832 match(Set dst (ReplicateD zero)); 3833 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3834 ins_encode %{ 3835 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3836 %} 3837 ins_pipe( fpu_reg_reg ); 3838 %} 3839 3840 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3841 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3842 match(Set dst (ReplicateD zero)); 3843 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3844 ins_encode %{ 3845 int vector_len = 1; 3846 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3847 %} 3848 ins_pipe( fpu_reg_reg ); 3849 %} 3850 3851 // ====================GENERIC REPLICATE========================================== 3852 3853 // Replicate byte scalar to be vector 3854 instruct Repl4B(vecS dst, rRegI src) %{ 3855 predicate(n->as_Vector()->length() == 4); 3856 match(Set dst (ReplicateB src)); 3857 format %{ "movd $dst,$src\n\t" 3858 "punpcklbw $dst,$dst\n\t" 3859 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3860 ins_encode %{ 3861 __ movdl($dst$$XMMRegister, $src$$Register); 3862 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3863 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3864 %} 3865 ins_pipe( pipe_slow ); 3866 %} 3867 3868 instruct Repl8B(vecD dst, rRegI src) %{ 3869 predicate(n->as_Vector()->length() == 8); 3870 match(Set dst (ReplicateB src)); 3871 format %{ "movd $dst,$src\n\t" 3872 "punpcklbw $dst,$dst\n\t" 3873 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3874 ins_encode %{ 3875 __ movdl($dst$$XMMRegister, $src$$Register); 3876 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3877 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3878 %} 3879 ins_pipe( pipe_slow ); 3880 %} 3881 3882 // Replicate byte scalar immediate to be vector by loading from const table. 3883 instruct Repl4B_imm(vecS dst, immI con) %{ 3884 predicate(n->as_Vector()->length() == 4); 3885 match(Set dst (ReplicateB con)); 3886 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3887 ins_encode %{ 3888 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 instruct Repl8B_imm(vecD dst, immI con) %{ 3894 predicate(n->as_Vector()->length() == 8); 3895 match(Set dst (ReplicateB con)); 3896 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3897 ins_encode %{ 3898 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3899 %} 3900 ins_pipe( pipe_slow ); 3901 %} 3902 3903 // Replicate byte scalar zero to be vector 3904 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3905 predicate(n->as_Vector()->length() == 4); 3906 match(Set dst (ReplicateB zero)); 3907 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3908 ins_encode %{ 3909 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3910 %} 3911 ins_pipe( fpu_reg_reg ); 3912 %} 3913 3914 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3915 predicate(n->as_Vector()->length() == 8); 3916 match(Set dst (ReplicateB zero)); 3917 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3918 ins_encode %{ 3919 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3920 %} 3921 ins_pipe( fpu_reg_reg ); 3922 %} 3923 3924 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3925 predicate(n->as_Vector()->length() == 16); 3926 match(Set dst (ReplicateB zero)); 3927 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3928 ins_encode %{ 3929 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3930 %} 3931 ins_pipe( fpu_reg_reg ); 3932 %} 3933 3934 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3935 predicate(n->as_Vector()->length() == 32); 3936 match(Set dst (ReplicateB zero)); 3937 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3938 ins_encode %{ 3939 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3940 int vector_len = 1; 3941 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3942 %} 3943 ins_pipe( fpu_reg_reg ); 3944 %} 3945 3946 // Replicate char/short (2 byte) scalar to be vector 3947 instruct Repl2S(vecS dst, rRegI src) %{ 3948 predicate(n->as_Vector()->length() == 2); 3949 match(Set dst (ReplicateS src)); 3950 format %{ "movd $dst,$src\n\t" 3951 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3952 ins_encode %{ 3953 __ movdl($dst$$XMMRegister, $src$$Register); 3954 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3955 %} 3956 ins_pipe( fpu_reg_reg ); 3957 %} 3958 3959 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3960 instruct Repl2S_imm(vecS dst, immI con) %{ 3961 predicate(n->as_Vector()->length() == 2); 3962 match(Set dst (ReplicateS con)); 3963 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3964 ins_encode %{ 3965 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3966 %} 3967 ins_pipe( fpu_reg_reg ); 3968 %} 3969 3970 instruct Repl4S_imm(vecD dst, immI con) %{ 3971 predicate(n->as_Vector()->length() == 4); 3972 match(Set dst (ReplicateS con)); 3973 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3974 ins_encode %{ 3975 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3976 %} 3977 ins_pipe( fpu_reg_reg ); 3978 %} 3979 3980 // Replicate char/short (2 byte) scalar zero to be vector 3981 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3982 predicate(n->as_Vector()->length() == 2); 3983 match(Set dst (ReplicateS zero)); 3984 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3985 ins_encode %{ 3986 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3987 %} 3988 ins_pipe( fpu_reg_reg ); 3989 %} 3990 3991 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3992 predicate(n->as_Vector()->length() == 4); 3993 match(Set dst (ReplicateS zero)); 3994 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3995 ins_encode %{ 3996 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3997 %} 3998 ins_pipe( fpu_reg_reg ); 3999 %} 4000 4001 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4002 predicate(n->as_Vector()->length() == 8); 4003 match(Set dst (ReplicateS zero)); 4004 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4005 ins_encode %{ 4006 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4007 %} 4008 ins_pipe( fpu_reg_reg ); 4009 %} 4010 4011 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4012 predicate(n->as_Vector()->length() == 16); 4013 match(Set dst (ReplicateS zero)); 4014 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4015 ins_encode %{ 4016 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4017 int vector_len = 1; 4018 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4019 %} 4020 ins_pipe( fpu_reg_reg ); 4021 %} 4022 4023 // Replicate integer (4 byte) scalar to be vector 4024 instruct Repl2I(vecD dst, rRegI src) %{ 4025 predicate(n->as_Vector()->length() == 2); 4026 match(Set dst (ReplicateI src)); 4027 format %{ "movd $dst,$src\n\t" 4028 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4029 ins_encode %{ 4030 __ movdl($dst$$XMMRegister, $src$$Register); 4031 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4032 %} 4033 ins_pipe( fpu_reg_reg ); 4034 %} 4035 4036 // Integer could be loaded into xmm register directly from memory. 4037 instruct Repl2I_mem(vecD dst, memory mem) %{ 4038 predicate(n->as_Vector()->length() == 2); 4039 match(Set dst (ReplicateI (LoadI mem))); 4040 format %{ "movd $dst,$mem\n\t" 4041 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4042 ins_encode %{ 4043 __ movdl($dst$$XMMRegister, $mem$$Address); 4044 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4045 %} 4046 ins_pipe( fpu_reg_reg ); 4047 %} 4048 4049 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4050 instruct Repl2I_imm(vecD dst, immI con) %{ 4051 predicate(n->as_Vector()->length() == 2); 4052 match(Set dst (ReplicateI con)); 4053 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4054 ins_encode %{ 4055 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4056 %} 4057 ins_pipe( fpu_reg_reg ); 4058 %} 4059 4060 // Replicate integer (4 byte) scalar zero to be vector 4061 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4062 predicate(n->as_Vector()->length() == 2); 4063 match(Set dst (ReplicateI zero)); 4064 format %{ "pxor $dst,$dst\t! replicate2I" %} 4065 ins_encode %{ 4066 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4067 %} 4068 ins_pipe( fpu_reg_reg ); 4069 %} 4070 4071 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4072 predicate(n->as_Vector()->length() == 4); 4073 match(Set dst (ReplicateI zero)); 4074 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4075 ins_encode %{ 4076 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4077 %} 4078 ins_pipe( fpu_reg_reg ); 4079 %} 4080 4081 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4082 predicate(n->as_Vector()->length() == 8); 4083 match(Set dst (ReplicateI zero)); 4084 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4085 ins_encode %{ 4086 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4087 int vector_len = 1; 4088 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4089 %} 4090 ins_pipe( fpu_reg_reg ); 4091 %} 4092 4093 // Replicate long (8 byte) scalar to be vector 4094 #ifdef _LP64 4095 instruct Repl2L(vecX dst, rRegL src) %{ 4096 predicate(n->as_Vector()->length() == 2); 4097 match(Set dst (ReplicateL src)); 4098 format %{ "movdq $dst,$src\n\t" 4099 "punpcklqdq $dst,$dst\t! replicate2L" %} 4100 ins_encode %{ 4101 __ movdq($dst$$XMMRegister, $src$$Register); 4102 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4103 %} 4104 ins_pipe( pipe_slow ); 4105 %} 4106 #else // _LP64 4107 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4108 predicate(n->as_Vector()->length() == 2); 4109 match(Set dst (ReplicateL src)); 4110 effect(TEMP dst, USE src, TEMP tmp); 4111 format %{ "movdl $dst,$src.lo\n\t" 4112 "movdl $tmp,$src.hi\n\t" 4113 "punpckldq $dst,$tmp\n\t" 4114 "punpcklqdq $dst,$dst\t! replicate2L"%} 4115 ins_encode %{ 4116 __ movdl($dst$$XMMRegister, $src$$Register); 4117 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4118 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4119 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4120 %} 4121 ins_pipe( pipe_slow ); 4122 %} 4123 #endif // _LP64 4124 4125 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4126 instruct Repl2L_imm(vecX dst, immL con) %{ 4127 predicate(n->as_Vector()->length() == 2); 4128 match(Set dst (ReplicateL con)); 4129 format %{ "movq $dst,[$constantaddress]\n\t" 4130 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4131 ins_encode %{ 4132 __ movq($dst$$XMMRegister, $constantaddress($con)); 4133 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4134 %} 4135 ins_pipe( pipe_slow ); 4136 %} 4137 4138 // Replicate long (8 byte) scalar zero to be vector 4139 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4140 predicate(n->as_Vector()->length() == 2); 4141 match(Set dst (ReplicateL zero)); 4142 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4143 ins_encode %{ 4144 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4145 %} 4146 ins_pipe( fpu_reg_reg ); 4147 %} 4148 4149 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4150 predicate(n->as_Vector()->length() == 4); 4151 match(Set dst (ReplicateL zero)); 4152 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4153 ins_encode %{ 4154 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4155 int vector_len = 1; 4156 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4157 %} 4158 ins_pipe( fpu_reg_reg ); 4159 %} 4160 4161 // Replicate float (4 byte) scalar to be vector 4162 instruct Repl2F(vecD dst, regF src) %{ 4163 predicate(n->as_Vector()->length() == 2); 4164 match(Set dst (ReplicateF src)); 4165 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4166 ins_encode %{ 4167 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4168 %} 4169 ins_pipe( fpu_reg_reg ); 4170 %} 4171 4172 instruct Repl4F(vecX dst, regF src) %{ 4173 predicate(n->as_Vector()->length() == 4); 4174 match(Set dst (ReplicateF src)); 4175 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4176 ins_encode %{ 4177 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4178 %} 4179 ins_pipe( pipe_slow ); 4180 %} 4181 4182 // Replicate double (8 bytes) scalar to be vector 4183 instruct Repl2D(vecX dst, regD src) %{ 4184 predicate(n->as_Vector()->length() == 2); 4185 match(Set dst (ReplicateD src)); 4186 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4187 ins_encode %{ 4188 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4189 %} 4190 ins_pipe( pipe_slow ); 4191 %} 4192 4193 // ====================EVEX REPLICATE============================================= 4194 4195 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4196 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4197 match(Set dst (ReplicateB (LoadB mem))); 4198 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4199 ins_encode %{ 4200 int vector_len = 0; 4201 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 4206 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4207 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4208 match(Set dst (ReplicateB (LoadB mem))); 4209 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4210 ins_encode %{ 4211 int vector_len = 0; 4212 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4213 %} 4214 ins_pipe( pipe_slow ); 4215 %} 4216 4217 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4218 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4219 match(Set dst (ReplicateB src)); 4220 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4221 ins_encode %{ 4222 int vector_len = 0; 4223 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4229 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4230 match(Set dst (ReplicateB (LoadB mem))); 4231 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4232 ins_encode %{ 4233 int vector_len = 0; 4234 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4240 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4241 match(Set dst (ReplicateB src)); 4242 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4243 ins_encode %{ 4244 int vector_len = 1; 4245 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4246 %} 4247 ins_pipe( pipe_slow ); 4248 %} 4249 4250 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4251 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4252 match(Set dst (ReplicateB (LoadB mem))); 4253 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4254 ins_encode %{ 4255 int vector_len = 1; 4256 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4257 %} 4258 ins_pipe( pipe_slow ); 4259 %} 4260 4261 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4262 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4263 match(Set dst (ReplicateB src)); 4264 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4265 ins_encode %{ 4266 int vector_len = 2; 4267 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4268 %} 4269 ins_pipe( pipe_slow ); 4270 %} 4271 4272 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4273 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4274 match(Set dst (ReplicateB (LoadB mem))); 4275 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4276 ins_encode %{ 4277 int vector_len = 2; 4278 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4279 %} 4280 ins_pipe( pipe_slow ); 4281 %} 4282 4283 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4284 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4285 match(Set dst (ReplicateB con)); 4286 format %{ "movq $dst,[$constantaddress]\n\t" 4287 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4288 ins_encode %{ 4289 int vector_len = 0; 4290 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4291 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4297 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4298 match(Set dst (ReplicateB con)); 4299 format %{ "movq $dst,[$constantaddress]\n\t" 4300 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4301 ins_encode %{ 4302 int vector_len = 1; 4303 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4304 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4305 %} 4306 ins_pipe( pipe_slow ); 4307 %} 4308 4309 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4310 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4311 match(Set dst (ReplicateB con)); 4312 format %{ "movq $dst,[$constantaddress]\n\t" 4313 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4314 ins_encode %{ 4315 int vector_len = 2; 4316 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4317 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4318 %} 4319 ins_pipe( pipe_slow ); 4320 %} 4321 4322 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4323 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4324 match(Set dst (ReplicateB zero)); 4325 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4326 ins_encode %{ 4327 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4328 int vector_len = 2; 4329 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4330 %} 4331 ins_pipe( fpu_reg_reg ); 4332 %} 4333 4334 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4335 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4336 match(Set dst (ReplicateS src)); 4337 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4338 ins_encode %{ 4339 int vector_len = 0; 4340 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4341 %} 4342 ins_pipe( pipe_slow ); 4343 %} 4344 4345 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4346 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4347 match(Set dst (ReplicateS (LoadS mem))); 4348 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4349 ins_encode %{ 4350 int vector_len = 0; 4351 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4352 %} 4353 ins_pipe( pipe_slow ); 4354 %} 4355 4356 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4357 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4358 match(Set dst (ReplicateS src)); 4359 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4360 ins_encode %{ 4361 int vector_len = 0; 4362 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4363 %} 4364 ins_pipe( pipe_slow ); 4365 %} 4366 4367 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4368 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4369 match(Set dst (ReplicateS (LoadS mem))); 4370 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4371 ins_encode %{ 4372 int vector_len = 0; 4373 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4374 %} 4375 ins_pipe( pipe_slow ); 4376 %} 4377 4378 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4379 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4380 match(Set dst (ReplicateS src)); 4381 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4382 ins_encode %{ 4383 int vector_len = 1; 4384 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4385 %} 4386 ins_pipe( pipe_slow ); 4387 %} 4388 4389 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4390 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4391 match(Set dst (ReplicateS (LoadS mem))); 4392 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4393 ins_encode %{ 4394 int vector_len = 1; 4395 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4396 %} 4397 ins_pipe( pipe_slow ); 4398 %} 4399 4400 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4401 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4402 match(Set dst (ReplicateS src)); 4403 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4404 ins_encode %{ 4405 int vector_len = 2; 4406 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4407 %} 4408 ins_pipe( pipe_slow ); 4409 %} 4410 4411 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4412 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4413 match(Set dst (ReplicateS (LoadS mem))); 4414 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4415 ins_encode %{ 4416 int vector_len = 2; 4417 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4418 %} 4419 ins_pipe( pipe_slow ); 4420 %} 4421 4422 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4423 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4424 match(Set dst (ReplicateS con)); 4425 format %{ "movq $dst,[$constantaddress]\n\t" 4426 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4427 ins_encode %{ 4428 int vector_len = 0; 4429 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4430 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4431 %} 4432 ins_pipe( pipe_slow ); 4433 %} 4434 4435 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4436 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4437 match(Set dst (ReplicateS con)); 4438 format %{ "movq $dst,[$constantaddress]\n\t" 4439 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4440 ins_encode %{ 4441 int vector_len = 1; 4442 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4443 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4444 %} 4445 ins_pipe( pipe_slow ); 4446 %} 4447 4448 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4449 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4450 match(Set dst (ReplicateS con)); 4451 format %{ "movq $dst,[$constantaddress]\n\t" 4452 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4453 ins_encode %{ 4454 int vector_len = 2; 4455 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4456 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4457 %} 4458 ins_pipe( pipe_slow ); 4459 %} 4460 4461 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4462 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4463 match(Set dst (ReplicateS zero)); 4464 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4465 ins_encode %{ 4466 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4467 int vector_len = 2; 4468 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4469 %} 4470 ins_pipe( fpu_reg_reg ); 4471 %} 4472 4473 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4474 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4475 match(Set dst (ReplicateI src)); 4476 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4477 ins_encode %{ 4478 int vector_len = 0; 4479 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4480 %} 4481 ins_pipe( pipe_slow ); 4482 %} 4483 4484 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4485 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4486 match(Set dst (ReplicateI (LoadI mem))); 4487 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4488 ins_encode %{ 4489 int vector_len = 0; 4490 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4496 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4497 match(Set dst (ReplicateI src)); 4498 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4499 ins_encode %{ 4500 int vector_len = 1; 4501 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4507 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4508 match(Set dst (ReplicateI (LoadI mem))); 4509 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4510 ins_encode %{ 4511 int vector_len = 1; 4512 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4518 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4519 match(Set dst (ReplicateI src)); 4520 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4521 ins_encode %{ 4522 int vector_len = 2; 4523 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4524 %} 4525 ins_pipe( pipe_slow ); 4526 %} 4527 4528 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4529 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4530 match(Set dst (ReplicateI (LoadI mem))); 4531 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4532 ins_encode %{ 4533 int vector_len = 2; 4534 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4535 %} 4536 ins_pipe( pipe_slow ); 4537 %} 4538 4539 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4540 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4541 match(Set dst (ReplicateI con)); 4542 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4543 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4544 ins_encode %{ 4545 int vector_len = 0; 4546 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4547 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4548 %} 4549 ins_pipe( pipe_slow ); 4550 %} 4551 4552 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4553 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4554 match(Set dst (ReplicateI con)); 4555 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4556 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4557 ins_encode %{ 4558 int vector_len = 1; 4559 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4560 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4561 %} 4562 ins_pipe( pipe_slow ); 4563 %} 4564 4565 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4566 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4567 match(Set dst (ReplicateI con)); 4568 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4569 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4570 ins_encode %{ 4571 int vector_len = 2; 4572 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4573 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4574 %} 4575 ins_pipe( pipe_slow ); 4576 %} 4577 4578 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4579 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4580 match(Set dst (ReplicateI zero)); 4581 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4582 ins_encode %{ 4583 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4584 int vector_len = 2; 4585 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4586 %} 4587 ins_pipe( fpu_reg_reg ); 4588 %} 4589 4590 // Replicate long (8 byte) scalar to be vector 4591 #ifdef _LP64 4592 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4593 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4594 match(Set dst (ReplicateL src)); 4595 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4596 ins_encode %{ 4597 int vector_len = 1; 4598 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4599 %} 4600 ins_pipe( pipe_slow ); 4601 %} 4602 4603 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4604 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4605 match(Set dst (ReplicateL src)); 4606 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4607 ins_encode %{ 4608 int vector_len = 2; 4609 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4610 %} 4611 ins_pipe( pipe_slow ); 4612 %} 4613 #else // _LP64 4614 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4615 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4616 match(Set dst (ReplicateL src)); 4617 effect(TEMP dst, USE src, TEMP tmp); 4618 format %{ "movdl $dst,$src.lo\n\t" 4619 "movdl $tmp,$src.hi\n\t" 4620 "punpckldq $dst,$tmp\n\t" 4621 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4622 ins_encode %{ 4623 int vector_len = 1; 4624 __ movdl($dst$$XMMRegister, $src$$Register); 4625 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4626 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4627 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4628 %} 4629 ins_pipe( pipe_slow ); 4630 %} 4631 4632 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4633 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4634 match(Set dst (ReplicateL src)); 4635 effect(TEMP dst, USE src, TEMP tmp); 4636 format %{ "movdl $dst,$src.lo\n\t" 4637 "movdl $tmp,$src.hi\n\t" 4638 "punpckldq $dst,$tmp\n\t" 4639 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4640 ins_encode %{ 4641 int vector_len = 2; 4642 __ movdl($dst$$XMMRegister, $src$$Register); 4643 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4644 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4645 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 #endif // _LP64 4650 4651 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4652 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4653 match(Set dst (ReplicateL con)); 4654 format %{ "movq $dst,[$constantaddress]\n\t" 4655 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4656 ins_encode %{ 4657 int vector_len = 1; 4658 __ movq($dst$$XMMRegister, $constantaddress($con)); 4659 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4660 %} 4661 ins_pipe( pipe_slow ); 4662 %} 4663 4664 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4665 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4666 match(Set dst (ReplicateL con)); 4667 format %{ "movq $dst,[$constantaddress]\n\t" 4668 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4669 ins_encode %{ 4670 int vector_len = 2; 4671 __ movq($dst$$XMMRegister, $constantaddress($con)); 4672 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4673 %} 4674 ins_pipe( pipe_slow ); 4675 %} 4676 4677 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4678 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4679 match(Set dst (ReplicateL (LoadL mem))); 4680 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4681 ins_encode %{ 4682 int vector_len = 0; 4683 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4684 %} 4685 ins_pipe( pipe_slow ); 4686 %} 4687 4688 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4689 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4690 match(Set dst (ReplicateL (LoadL mem))); 4691 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4692 ins_encode %{ 4693 int vector_len = 1; 4694 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4695 %} 4696 ins_pipe( pipe_slow ); 4697 %} 4698 4699 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4700 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4701 match(Set dst (ReplicateL (LoadL mem))); 4702 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4703 ins_encode %{ 4704 int vector_len = 2; 4705 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4706 %} 4707 ins_pipe( pipe_slow ); 4708 %} 4709 4710 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4711 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4712 match(Set dst (ReplicateL zero)); 4713 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4714 ins_encode %{ 4715 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4716 int vector_len = 2; 4717 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4718 %} 4719 ins_pipe( fpu_reg_reg ); 4720 %} 4721 4722 instruct Repl8F_evex(vecY dst, regF src) %{ 4723 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4724 match(Set dst (ReplicateF src)); 4725 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4726 ins_encode %{ 4727 int vector_len = 1; 4728 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4729 %} 4730 ins_pipe( pipe_slow ); 4731 %} 4732 4733 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4734 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4735 match(Set dst (ReplicateF (LoadF mem))); 4736 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4737 ins_encode %{ 4738 int vector_len = 1; 4739 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct Repl16F_evex(vecZ dst, regF src) %{ 4745 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4746 match(Set dst (ReplicateF src)); 4747 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4748 ins_encode %{ 4749 int vector_len = 2; 4750 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4751 %} 4752 ins_pipe( pipe_slow ); 4753 %} 4754 4755 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4756 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4757 match(Set dst (ReplicateF (LoadF mem))); 4758 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4759 ins_encode %{ 4760 int vector_len = 2; 4761 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4762 %} 4763 ins_pipe( pipe_slow ); 4764 %} 4765 4766 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4767 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4768 match(Set dst (ReplicateF zero)); 4769 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4770 ins_encode %{ 4771 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4772 int vector_len = 2; 4773 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4774 %} 4775 ins_pipe( fpu_reg_reg ); 4776 %} 4777 4778 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4779 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4780 match(Set dst (ReplicateF zero)); 4781 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4782 ins_encode %{ 4783 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4784 int vector_len = 2; 4785 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4786 %} 4787 ins_pipe( fpu_reg_reg ); 4788 %} 4789 4790 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4791 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4792 match(Set dst (ReplicateF zero)); 4793 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4794 ins_encode %{ 4795 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4796 int vector_len = 2; 4797 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4798 %} 4799 ins_pipe( fpu_reg_reg ); 4800 %} 4801 4802 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4803 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4804 match(Set dst (ReplicateF zero)); 4805 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4806 ins_encode %{ 4807 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4808 int vector_len = 2; 4809 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4810 %} 4811 ins_pipe( fpu_reg_reg ); 4812 %} 4813 4814 instruct Repl4D_evex(vecY dst, regD src) %{ 4815 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4816 match(Set dst (ReplicateD src)); 4817 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4818 ins_encode %{ 4819 int vector_len = 1; 4820 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4821 %} 4822 ins_pipe( pipe_slow ); 4823 %} 4824 4825 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4826 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4827 match(Set dst (ReplicateD (LoadD mem))); 4828 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4829 ins_encode %{ 4830 int vector_len = 1; 4831 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4832 %} 4833 ins_pipe( pipe_slow ); 4834 %} 4835 4836 instruct Repl8D_evex(vecZ dst, regD src) %{ 4837 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4838 match(Set dst (ReplicateD src)); 4839 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4840 ins_encode %{ 4841 int vector_len = 2; 4842 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4843 %} 4844 ins_pipe( pipe_slow ); 4845 %} 4846 4847 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4848 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4849 match(Set dst (ReplicateD (LoadD mem))); 4850 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4851 ins_encode %{ 4852 int vector_len = 2; 4853 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4854 %} 4855 ins_pipe( pipe_slow ); 4856 %} 4857 4858 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4859 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4860 match(Set dst (ReplicateD zero)); 4861 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4862 ins_encode %{ 4863 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4864 int vector_len = 2; 4865 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4866 %} 4867 ins_pipe( fpu_reg_reg ); 4868 %} 4869 4870 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4871 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4872 match(Set dst (ReplicateD zero)); 4873 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4874 ins_encode %{ 4875 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4876 int vector_len = 2; 4877 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4878 %} 4879 ins_pipe( fpu_reg_reg ); 4880 %} 4881 4882 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4883 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4884 match(Set dst (ReplicateD zero)); 4885 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4886 ins_encode %{ 4887 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4888 int vector_len = 2; 4889 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4890 %} 4891 ins_pipe( fpu_reg_reg ); 4892 %} 4893 4894 // ====================REDUCTION ARITHMETIC======================================= 4895 4896 instruct rsadd8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 4897 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 4898 match(Set dst (AddReductionVI src1 src2)); 4899 effect(TEMP tmp, TEMP tmp2, TEMP dst); 4900 format %{ 4901 "pshufd $tmp,$src2,0x1\n\t" 4902 "paddb $tmp,$src2\n\t" 4903 "movzbl $dst,$src1\n\t" 4904 "pextrb $tmp2,$tmp, 0x0\n\t" 4905 "addb $dst,$tmp2\n\t" 4906 "pextrb $tmp2,$tmp, 0x1\n\t" 4907 "addb $dst,$tmp2\n\t" 4908 "pextrb $tmp2,$tmp, 0x2\n\t" 4909 "addb $dst,$tmp2\n\t" 4910 "pextrb $tmp2,$tmp, 0x3\n\t" 4911 "addb $dst,$tmp2\n\t" 4912 "movsbl $dst,$dst\t! add reduction8B" %} 4913 ins_encode %{ 4914 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 4915 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 4916 __ movzbl($dst$$Register, $src1$$Register); 4917 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 4918 __ addb($dst$$Register, $tmp2$$Register); 4919 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 4920 __ addb($dst$$Register, $tmp2$$Register); 4921 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 4922 __ addb($dst$$Register, $tmp2$$Register); 4923 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 4924 __ addb($dst$$Register, $tmp2$$Register); 4925 __ movsbl($dst$$Register, $dst$$Register); 4926 %} 4927 ins_pipe( pipe_slow ); 4928 %} 4929 4930 instruct rsadd16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 4931 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 4932 match(Set dst (AddReductionVI src1 src2)); 4933 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 4934 format %{ "pshufd $tmp,$src2,0xE\n\t" 4935 "paddb $tmp,$src2\n\t" 4936 "pshufd $tmp2,$tmp,0x1\n\t" 4937 "paddb $tmp,$tmp,$tmp2\n\t" 4938 "movzbl $dst,$src1\n\t" 4939 "pextrb $tmp3,$tmp, 0x0\n\t" 4940 "addb $dst,$tmp3\n\t" 4941 "pextrb $tmp3,$tmp, 0x1\n\t" 4942 "addb $dst,$tmp3\n\t" 4943 "pextrb $tmp3,$tmp, 0x2\n\t" 4944 "addb $dst,$tmp3\n\t" 4945 "pextrb $tmp3,$tmp, 0x3\n\t" 4946 "addb $dst,$tmp3\n\t" 4947 "movsbl $dst,$dst\t! add reduction16B" %} 4948 ins_encode %{ 4949 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4950 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 4951 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4952 __ paddb($tmp$$XMMRegister, $tmp2$$XMMRegister); 4953 __ movzbl($dst$$Register, $src1$$Register); 4954 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 4955 __ addb($dst$$Register, $tmp3$$Register); 4956 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 4957 __ addb($dst$$Register, $tmp3$$Register); 4958 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 4959 __ addb($dst$$Register, $tmp3$$Register); 4960 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 4961 __ addb($dst$$Register, $tmp3$$Register); 4962 __ movsbl($dst$$Register, $dst$$Register); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 instruct rvadd32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 4968 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 4969 match(Set dst (AddReductionVI src1 src2)); 4970 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 4971 format %{ "vextracti128_high $tmp,$src2\n\t" 4972 "vpaddb $tmp,$tmp,$src2\n\t" 4973 "pshufd $tmp2,$tmp,0xE\n\t" 4974 "vpaddb $tmp,$tmp,$tmp2\n\t" 4975 "pshufd $tmp2,$tmp,0x1\n\t" 4976 "vpaddb $tmp,$tmp,$tmp2\n\t" 4977 "movzbl $dst,$src1\n\t" 4978 "pextrb $tmp3,$tmp, 0x0\n\t" 4979 "addb $dst,$tmp3\n\t" 4980 "pextrb $tmp3,$tmp, 0x1\n\t" 4981 "addb $dst,$tmp3\n\t" 4982 "pextrb $tmp3,$tmp, 0x2\n\t" 4983 "addb $dst,$tmp3\n\t" 4984 "pextrb $tmp3,$tmp, 0x3\n\t" 4985 "addb $dst,$tmp3\n\t" 4986 "movsbl $dst,$dst\t! add reduction32B" %} 4987 ins_encode %{ 4988 int vector_len = 0; 4989 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4990 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4991 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4992 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4993 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4994 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4995 __ movzbl($dst$$Register, $src1$$Register); 4996 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 4997 __ addb($dst$$Register, $tmp3$$Register); 4998 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 4999 __ addb($dst$$Register, $tmp3$$Register); 5000 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5001 __ addb($dst$$Register, $tmp3$$Register); 5002 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5003 __ addb($dst$$Register, $tmp3$$Register); 5004 __ movsbl($dst$$Register, $dst$$Register); 5005 %} 5006 ins_pipe( pipe_slow ); 5007 %} 5008 5009 instruct rvadd64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5010 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5011 match(Set dst (AddReductionVI src1 src2)); 5012 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5013 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5014 "vpaddb $tmp2,$tmp2,$src2\n\t" 5015 "vextracti128_high $tmp,$tmp2\n\t" 5016 "vpaddb $tmp,$tmp,$tmp2\n\t" 5017 "pshufd $tmp2,$tmp,0xE\n\t" 5018 "vpaddb $tmp,$tmp,$tmp2\n\t" 5019 "pshufd $tmp2,$tmp,0x1\n\t" 5020 "vpaddb $tmp,$tmp,$tmp2\n\t" 5021 "movzbl $dst,$src1\n\t" 5022 "movdl $tmp3,$tmp\n\t" 5023 "addb $dst,$tmp3\n\t" 5024 "shrl $tmp3,0x8\n\t" 5025 "addb $dst,$tmp3\n\t" 5026 "shrl $tmp3,0x8\n\t" 5027 "addb $dst,$tmp3\n\t" 5028 "shrl $tmp3,0x8\n\t" 5029 "addb $dst,$tmp3\n\t" 5030 "movsbl $dst,$dst\t! add reduction64B" %} 5031 ins_encode %{ 5032 int vector_len = 0; 5033 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5034 __ vpaddb($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5035 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5036 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5037 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5038 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5039 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5040 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5041 __ movzbl($dst$$Register, $src1$$Register); 5042 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 5043 __ addb($dst$$Register, $tmp3$$Register); 5044 __ shrl($tmp3$$Register, 8); 5045 __ addb($dst$$Register, $tmp3$$Register); 5046 __ shrl($tmp3$$Register, 8); 5047 __ addb($dst$$Register, $tmp3$$Register); 5048 __ shrl($tmp3$$Register, 8); 5049 __ addb($dst$$Register, $tmp3$$Register); 5050 __ movsbl($dst$$Register, $dst$$Register); 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 5055 instruct rsadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5056 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5057 match(Set dst (AddReductionVI src1 src2)); 5058 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5059 format %{ 5060 "movdqu $tmp,$src2\n\t" 5061 "phaddw $tmp,$tmp\n\t" 5062 "phaddw $tmp,$tmp\n\t" 5063 "movzwl $dst,$src1\n\t" 5064 "pextrw $tmp2,$tmp, 0x0\n\t" 5065 "addw $dst,$tmp2\n\t" 5066 "movswl $dst,$dst\t! add reduction4S" %} 5067 ins_encode %{ 5068 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 5069 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5070 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5071 __ movzwl($dst$$Register, $src1$$Register); 5072 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5073 __ addw($dst$$Register, $tmp2$$Register); 5074 __ movswl($dst$$Register, $dst$$Register); 5075 %} 5076 ins_pipe( pipe_slow ); 5077 %} 5078 5079 instruct rvadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5080 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5081 match(Set dst (AddReductionVI src1 src2)); 5082 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5083 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5084 "vphaddw $tmp,$tmp,$tmp\n\t" 5085 "movzwl $dst,$src1\n\t" 5086 "pextrw $tmp2,$tmp, 0x0\n\t" 5087 "addw $dst,$tmp2\n\t" 5088 "movswl $dst,$dst\t! add reduction4S" %} 5089 ins_encode %{ 5090 int vector_len = 0; 5091 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5092 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5093 __ movzwl($dst$$Register, $src1$$Register); 5094 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5095 __ addw($dst$$Register, $tmp2$$Register); 5096 __ movswl($dst$$Register, $dst$$Register); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 instruct rsadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5102 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5103 match(Set dst (AddReductionVI src1 src2)); 5104 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5105 format %{ 5106 "movdqu $tmp,$src2\n\t" 5107 "phaddw $tmp,$tmp\n\t" 5108 "phaddw $tmp,$tmp\n\t" 5109 "phaddw $tmp,$tmp\n\t" 5110 "movzwl $dst,$src1\n\t" 5111 "pextrw $tmp2,$tmp, 0x0\n\t" 5112 "addw $dst,$tmp2\n\t" 5113 "movswl $dst,$dst\t! add reduction8S" %} 5114 ins_encode %{ 5115 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5116 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5117 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5118 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5119 __ movzwl($dst$$Register, $src1$$Register); 5120 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5121 __ addw($dst$$Register, $tmp2$$Register); 5122 __ movswl($dst$$Register, $dst$$Register); 5123 %} 5124 ins_pipe( pipe_slow ); 5125 %} 5126 5127 instruct rvadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5128 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5129 match(Set dst (AddReductionVI src1 src2)); 5130 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5131 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5132 "vphaddw $tmp,$tmp,$tmp\n\t" 5133 "vphaddw $tmp,$tmp,$tmp\n\t" 5134 "movzwl $dst,$src1\n\t" 5135 "pextrw $tmp2,$tmp, 0x0\n\t" 5136 "addw $dst,$tmp2\n\t" 5137 "movswl $dst,$dst\t! add reduction8S" %} 5138 ins_encode %{ 5139 int vector_len = 0; 5140 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5141 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5142 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5143 __ movzwl($dst$$Register, $src1$$Register); 5144 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5145 __ addw($dst$$Register, $tmp2$$Register); 5146 __ movswl($dst$$Register, $dst$$Register); 5147 %} 5148 ins_pipe( pipe_slow ); 5149 %} 5150 5151 instruct rvadd16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2) %{ 5152 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5153 match(Set dst (AddReductionVI src1 src2)); 5154 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5155 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5156 "vphaddw $tmp,$tmp,$tmp\n\t" 5157 "vphaddw $tmp,$tmp,$tmp\n\t" 5158 "vphaddw $tmp,$tmp,$tmp\n\t" 5159 "movzwl $dst,$src1\n\t" 5160 "pextrw $tmp2,$tmp, 0x0\n\t" 5161 "addw $dst,$tmp2\n\t" 5162 "movswl $dst,$dst\t! add reduction16S" %} 5163 ins_encode %{ 5164 int vector_len = 1; 5165 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5166 __ vpermq($tmp$$XMMRegister, $tmp$$XMMRegister, 0xD8, vector_len); 5167 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5168 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5169 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5170 __ movzwl($dst$$Register, $src1$$Register); 5171 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5172 __ addw($dst$$Register, $tmp2$$Register); 5173 __ movswl($dst$$Register, $dst$$Register); 5174 %} 5175 ins_pipe( pipe_slow ); 5176 %} 5177 5178 instruct rvadd32S_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5179 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5180 match(Set dst (AddReductionVI src1 src2)); 5181 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5182 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5183 "vpaddw $tmp2,$tmp2,$src2\n\t" 5184 "vextracti128_high $tmp,$tmp2\n\t" 5185 "vpaddw $tmp,$tmp,$tmp2\n\t" 5186 "pshufd $tmp2,$tmp,0xE\n\t" 5187 "vpaddw $tmp,$tmp,$tmp2\n\t" 5188 "pshufd $tmp2,$tmp,0x1\n\t" 5189 "vpaddw $tmp,$tmp,$tmp2\n\t" 5190 "movdl $tmp3,$tmp\n\t" 5191 "addw $dst,$tmp3\n\t" 5192 "shrl $tmp3,0x16\n\t" 5193 "addw $dst,$tmp3\n\t" 5194 "movswl $dst,$dst\t! add reduction32S" %} 5195 ins_encode %{ 5196 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5197 __ vpaddw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5198 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5199 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5200 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5201 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5202 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5203 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5204 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 5205 __ addw($dst$$Register, $tmp3$$Register); 5206 __ shrl($tmp3$$Register, 16); 5207 __ addw($dst$$Register, $tmp3$$Register); 5208 __ movswl($dst$$Register, $dst$$Register); 5209 %} 5210 ins_pipe( pipe_slow ); 5211 %} 5212 5213 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5214 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5215 match(Set dst (AddReductionVI src1 src2)); 5216 effect(TEMP tmp2, TEMP tmp); 5217 format %{ "movdqu $tmp2,$src2\n\t" 5218 "phaddd $tmp2,$tmp2\n\t" 5219 "movd $tmp,$src1\n\t" 5220 "paddd $tmp,$tmp2\n\t" 5221 "movd $dst,$tmp\t! add reduction2I" %} 5222 ins_encode %{ 5223 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 5224 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5225 __ movdl($tmp$$XMMRegister, $src1$$Register); 5226 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 5227 __ movdl($dst$$Register, $tmp$$XMMRegister); 5228 %} 5229 ins_pipe( pipe_slow ); 5230 %} 5231 5232 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5233 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5234 match(Set dst (AddReductionVI src1 src2)); 5235 effect(TEMP tmp, TEMP tmp2); 5236 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5237 "movd $tmp2,$src1\n\t" 5238 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5239 "movd $dst,$tmp2\t! add reduction2I" %} 5240 ins_encode %{ 5241 int vector_len = 0; 5242 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5243 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5244 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5245 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5246 %} 5247 ins_pipe( pipe_slow ); 5248 %} 5249 5250 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5251 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5252 match(Set dst (AddReductionVI src1 src2)); 5253 effect(TEMP tmp, TEMP tmp2); 5254 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5255 "vpaddd $tmp,$src2,$tmp2\n\t" 5256 "movd $tmp2,$src1\n\t" 5257 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5258 "movd $dst,$tmp2\t! add reduction2I" %} 5259 ins_encode %{ 5260 int vector_len = 0; 5261 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5262 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5263 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5264 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5265 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5271 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5272 match(Set dst (AddReductionVI src1 src2)); 5273 effect(TEMP tmp, TEMP tmp2); 5274 format %{ "movdqu $tmp,$src2\n\t" 5275 "phaddd $tmp,$tmp\n\t" 5276 "phaddd $tmp,$tmp\n\t" 5277 "movd $tmp2,$src1\n\t" 5278 "paddd $tmp2,$tmp\n\t" 5279 "movd $dst,$tmp2\t! add reduction4I" %} 5280 ins_encode %{ 5281 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5282 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5283 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5284 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5285 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 5286 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5287 %} 5288 ins_pipe( pipe_slow ); 5289 %} 5290 5291 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5292 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5293 match(Set dst (AddReductionVI src1 src2)); 5294 effect(TEMP tmp, TEMP tmp2); 5295 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5296 "vphaddd $tmp,$tmp,$tmp\n\t" 5297 "movd $tmp2,$src1\n\t" 5298 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5299 "movd $dst,$tmp2\t! add reduction4I" %} 5300 ins_encode %{ 5301 int vector_len = 0; 5302 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5303 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5304 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5305 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 5306 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5307 %} 5308 ins_pipe( pipe_slow ); 5309 %} 5310 5311 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5312 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5313 match(Set dst (AddReductionVI src1 src2)); 5314 effect(TEMP tmp, TEMP tmp2); 5315 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5316 "vpaddd $tmp,$src2,$tmp2\n\t" 5317 "pshufd $tmp2,$tmp,0x1\n\t" 5318 "vpaddd $tmp,$tmp,$tmp2\n\t" 5319 "movd $tmp2,$src1\n\t" 5320 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5321 "movd $dst,$tmp2\t! add reduction4I" %} 5322 ins_encode %{ 5323 int vector_len = 0; 5324 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5325 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5326 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5327 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5328 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5329 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5330 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5331 %} 5332 ins_pipe( pipe_slow ); 5333 %} 5334 5335 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5336 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5337 match(Set dst (AddReductionVI src1 src2)); 5338 effect(TEMP tmp, TEMP tmp2); 5339 format %{ "vphaddd $tmp,$src2,$src2\n\t" 5340 "vphaddd $tmp,$tmp,$tmp2\n\t" 5341 "vextracti128_high $tmp2,$tmp\n\t" 5342 "vpaddd $tmp,$tmp,$tmp2\n\t" 5343 "movd $tmp2,$src1\n\t" 5344 "vpaddd $tmp2,$tmp2,$tmp\n\t" 5345 "movd $dst,$tmp2\t! add reduction8I" %} 5346 ins_encode %{ 5347 int vector_len = 1; 5348 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5349 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5350 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 5351 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5352 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5353 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5354 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5355 %} 5356 ins_pipe( pipe_slow ); 5357 %} 5358 5359 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5360 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5361 match(Set dst (AddReductionVI src1 src2)); 5362 effect(TEMP tmp, TEMP tmp2); 5363 format %{ "vextracti128_high $tmp,$src2\n\t" 5364 "vpaddd $tmp,$tmp,$src2\n\t" 5365 "pshufd $tmp2,$tmp,0xE\n\t" 5366 "vpaddd $tmp,$tmp,$tmp2\n\t" 5367 "pshufd $tmp2,$tmp,0x1\n\t" 5368 "vpaddd $tmp,$tmp,$tmp2\n\t" 5369 "movd $tmp2,$src1\n\t" 5370 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5371 "movd $dst,$tmp2\t! add reduction8I" %} 5372 ins_encode %{ 5373 int vector_len = 0; 5374 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5375 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5376 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5377 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5378 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5379 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5380 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5381 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5382 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5383 %} 5384 ins_pipe( pipe_slow ); 5385 %} 5386 5387 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5388 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 5389 match(Set dst (AddReductionVI src1 src2)); 5390 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5391 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5392 "vpaddd $tmp3,$tmp3,$src2\n\t" 5393 "vextracti128_high $tmp,$tmp3\n\t" 5394 "vpaddd $tmp,$tmp,$tmp3\n\t" 5395 "pshufd $tmp2,$tmp,0xE\n\t" 5396 "vpaddd $tmp,$tmp,$tmp2\n\t" 5397 "pshufd $tmp2,$tmp,0x1\n\t" 5398 "vpaddd $tmp,$tmp,$tmp2\n\t" 5399 "movd $tmp2,$src1\n\t" 5400 "vpaddd $tmp2,$tmp,$tmp2\n\t" 5401 "movd $dst,$tmp2\t! mul reduction16I" %} 5402 ins_encode %{ 5403 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5404 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5405 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5406 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5407 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5408 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5409 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5410 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5411 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5412 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5413 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5414 %} 5415 ins_pipe( pipe_slow ); 5416 %} 5417 5418 #ifdef _LP64 5419 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5420 predicate(UseAVX > 2); 5421 match(Set dst (AddReductionVL src1 src2)); 5422 effect(TEMP tmp, TEMP tmp2); 5423 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5424 "vpaddq $tmp,$src2,$tmp2\n\t" 5425 "movdq $tmp2,$src1\n\t" 5426 "vpaddq $tmp2,$tmp,$tmp2\n\t" 5427 "movdq $dst,$tmp2\t! add reduction2L" %} 5428 ins_encode %{ 5429 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5430 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5431 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5432 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5433 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5434 %} 5435 ins_pipe( pipe_slow ); 5436 %} 5437 5438 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5439 predicate(UseAVX > 2); 5440 match(Set dst (AddReductionVL src1 src2)); 5441 effect(TEMP tmp, TEMP tmp2); 5442 format %{ "vextracti128_high $tmp,$src2\n\t" 5443 "vpaddq $tmp2,$tmp,$src2\n\t" 5444 "pshufd $tmp,$tmp2,0xE\n\t" 5445 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5446 "movdq $tmp,$src1\n\t" 5447 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5448 "movdq $dst,$tmp2\t! add reduction4L" %} 5449 ins_encode %{ 5450 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5451 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5452 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5453 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5454 __ movdq($tmp$$XMMRegister, $src1$$Register); 5455 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5456 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5462 predicate(UseAVX > 2); 5463 match(Set dst (AddReductionVL src1 src2)); 5464 effect(TEMP tmp, TEMP tmp2); 5465 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5466 "vpaddq $tmp2,$tmp2,$src2\n\t" 5467 "vextracti128_high $tmp,$tmp2\n\t" 5468 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5469 "pshufd $tmp,$tmp2,0xE\n\t" 5470 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5471 "movdq $tmp,$src1\n\t" 5472 "vpaddq $tmp2,$tmp2,$tmp\n\t" 5473 "movdq $dst,$tmp2\t! add reduction8L" %} 5474 ins_encode %{ 5475 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5476 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5477 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5478 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5479 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5480 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5481 __ movdq($tmp$$XMMRegister, $src1$$Register); 5482 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5483 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5484 %} 5485 ins_pipe( pipe_slow ); 5486 %} 5487 #endif 5488 5489 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5490 predicate(UseSSE >= 1 && UseAVX == 0); 5491 match(Set dst (AddReductionVF dst src2)); 5492 effect(TEMP dst, TEMP tmp); 5493 format %{ "addss $dst,$src2\n\t" 5494 "pshufd $tmp,$src2,0x01\n\t" 5495 "addss $dst,$tmp\t! add reduction2F" %} 5496 ins_encode %{ 5497 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5498 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5499 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5505 predicate(UseAVX > 0); 5506 match(Set dst (AddReductionVF dst src2)); 5507 effect(TEMP dst, TEMP tmp); 5508 format %{ "vaddss $dst,$dst,$src2\n\t" 5509 "pshufd $tmp,$src2,0x01\n\t" 5510 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 5511 ins_encode %{ 5512 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5513 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5514 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5515 %} 5516 ins_pipe( pipe_slow ); 5517 %} 5518 5519 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5520 predicate(UseSSE >= 1 && UseAVX == 0); 5521 match(Set dst (AddReductionVF dst src2)); 5522 effect(TEMP dst, TEMP tmp); 5523 format %{ "addss $dst,$src2\n\t" 5524 "pshufd $tmp,$src2,0x01\n\t" 5525 "addss $dst,$tmp\n\t" 5526 "pshufd $tmp,$src2,0x02\n\t" 5527 "addss $dst,$tmp\n\t" 5528 "pshufd $tmp,$src2,0x03\n\t" 5529 "addss $dst,$tmp\t! add reduction4F" %} 5530 ins_encode %{ 5531 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 5532 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5533 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5534 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5535 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5536 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5537 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5541 5542 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5543 predicate(UseAVX > 0); 5544 match(Set dst (AddReductionVF dst src2)); 5545 effect(TEMP tmp, TEMP dst); 5546 format %{ "vaddss $dst,dst,$src2\n\t" 5547 "pshufd $tmp,$src2,0x01\n\t" 5548 "vaddss $dst,$dst,$tmp\n\t" 5549 "pshufd $tmp,$src2,0x02\n\t" 5550 "vaddss $dst,$dst,$tmp\n\t" 5551 "pshufd $tmp,$src2,0x03\n\t" 5552 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 5553 ins_encode %{ 5554 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5555 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5556 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5557 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5558 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5559 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5560 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5561 %} 5562 ins_pipe( pipe_slow ); 5563 %} 5564 5565 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5566 predicate(UseAVX > 0); 5567 match(Set dst (AddReductionVF dst src2)); 5568 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5569 format %{ "vaddss $dst,$dst,$src2\n\t" 5570 "pshufd $tmp,$src2,0x01\n\t" 5571 "vaddss $dst,$dst,$tmp\n\t" 5572 "pshufd $tmp,$src2,0x02\n\t" 5573 "vaddss $dst,$dst,$tmp\n\t" 5574 "pshufd $tmp,$src2,0x03\n\t" 5575 "vaddss $dst,$dst,$tmp\n\t" 5576 "vextractf128_high $tmp2,$src2\n\t" 5577 "vaddss $dst,$dst,$tmp2\n\t" 5578 "pshufd $tmp,$tmp2,0x01\n\t" 5579 "vaddss $dst,$dst,$tmp\n\t" 5580 "pshufd $tmp,$tmp2,0x02\n\t" 5581 "vaddss $dst,$dst,$tmp\n\t" 5582 "pshufd $tmp,$tmp2,0x03\n\t" 5583 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 5584 ins_encode %{ 5585 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5586 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5587 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5588 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5589 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5590 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5591 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5592 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5593 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5594 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5595 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5596 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5597 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5598 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5599 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5600 %} 5601 ins_pipe( pipe_slow ); 5602 %} 5603 5604 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5605 predicate(UseAVX > 2); 5606 match(Set dst (AddReductionVF dst src2)); 5607 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5608 format %{ "vaddss $dst,$dst,$src2\n\t" 5609 "pshufd $tmp,$src2,0x01\n\t" 5610 "vaddss $dst,$dst,$tmp\n\t" 5611 "pshufd $tmp,$src2,0x02\n\t" 5612 "vaddss $dst,$dst,$tmp\n\t" 5613 "pshufd $tmp,$src2,0x03\n\t" 5614 "vaddss $dst,$dst,$tmp\n\t" 5615 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5616 "vaddss $dst,$dst,$tmp2\n\t" 5617 "pshufd $tmp,$tmp2,0x01\n\t" 5618 "vaddss $dst,$dst,$tmp\n\t" 5619 "pshufd $tmp,$tmp2,0x02\n\t" 5620 "vaddss $dst,$dst,$tmp\n\t" 5621 "pshufd $tmp,$tmp2,0x03\n\t" 5622 "vaddss $dst,$dst,$tmp\n\t" 5623 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5624 "vaddss $dst,$dst,$tmp2\n\t" 5625 "pshufd $tmp,$tmp2,0x01\n\t" 5626 "vaddss $dst,$dst,$tmp\n\t" 5627 "pshufd $tmp,$tmp2,0x02\n\t" 5628 "vaddss $dst,$dst,$tmp\n\t" 5629 "pshufd $tmp,$tmp2,0x03\n\t" 5630 "vaddss $dst,$dst,$tmp\n\t" 5631 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5632 "vaddss $dst,$dst,$tmp2\n\t" 5633 "pshufd $tmp,$tmp2,0x01\n\t" 5634 "vaddss $dst,$dst,$tmp\n\t" 5635 "pshufd $tmp,$tmp2,0x02\n\t" 5636 "vaddss $dst,$dst,$tmp\n\t" 5637 "pshufd $tmp,$tmp2,0x03\n\t" 5638 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 5639 ins_encode %{ 5640 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5641 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5642 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5643 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5644 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5645 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5646 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5647 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5648 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5649 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5650 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5651 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5652 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5653 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5654 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5655 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5656 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5657 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5658 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5659 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5660 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5661 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5662 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5663 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5664 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5665 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5666 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5667 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5668 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5669 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5670 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5671 %} 5672 ins_pipe( pipe_slow ); 5673 %} 5674 5675 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5676 predicate(UseSSE >= 1 && UseAVX == 0); 5677 match(Set dst (AddReductionVD dst src2)); 5678 effect(TEMP tmp, TEMP dst); 5679 format %{ "addsd $dst,$src2\n\t" 5680 "pshufd $tmp,$src2,0xE\n\t" 5681 "addsd $dst,$tmp\t! add reduction2D" %} 5682 ins_encode %{ 5683 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 5684 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5685 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5686 %} 5687 ins_pipe( pipe_slow ); 5688 %} 5689 5690 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5691 predicate(UseAVX > 0); 5692 match(Set dst (AddReductionVD dst src2)); 5693 effect(TEMP tmp, TEMP dst); 5694 format %{ "vaddsd $dst,$dst,$src2\n\t" 5695 "pshufd $tmp,$src2,0xE\n\t" 5696 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5697 ins_encode %{ 5698 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5699 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5700 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5706 predicate(UseAVX > 0); 5707 match(Set dst (AddReductionVD dst src2)); 5708 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5709 format %{ "vaddsd $dst,$dst,$src2\n\t" 5710 "pshufd $tmp,$src2,0xE\n\t" 5711 "vaddsd $dst,$dst,$tmp\n\t" 5712 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5713 "vaddsd $dst,$dst,$tmp2\n\t" 5714 "pshufd $tmp,$tmp2,0xE\n\t" 5715 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5716 ins_encode %{ 5717 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5718 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5719 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5720 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5721 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5722 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5723 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5729 predicate(UseAVX > 2); 5730 match(Set dst (AddReductionVD dst src2)); 5731 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5732 format %{ "vaddsd $dst,$dst,$src2\n\t" 5733 "pshufd $tmp,$src2,0xE\n\t" 5734 "vaddsd $dst,$dst,$tmp\n\t" 5735 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5736 "vaddsd $dst,$dst,$tmp2\n\t" 5737 "pshufd $tmp,$tmp2,0xE\n\t" 5738 "vaddsd $dst,$dst,$tmp\n\t" 5739 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5740 "vaddsd $dst,$dst,$tmp2\n\t" 5741 "pshufd $tmp,$tmp2,0xE\n\t" 5742 "vaddsd $dst,$dst,$tmp\n\t" 5743 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5744 "vaddsd $dst,$dst,$tmp2\n\t" 5745 "pshufd $tmp,$tmp2,0xE\n\t" 5746 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5747 ins_encode %{ 5748 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5749 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5750 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5751 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5752 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5753 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5754 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5755 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5756 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5757 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5758 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5759 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5760 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5761 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5762 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5763 %} 5764 ins_pipe( pipe_slow ); 5765 %} 5766 5767 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5768 predicate(UseSSE > 3 && UseAVX == 0); 5769 match(Set dst (MulReductionVI src1 src2)); 5770 effect(TEMP tmp, TEMP tmp2); 5771 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5772 "pmulld $tmp2,$src2\n\t" 5773 "movd $tmp,$src1\n\t" 5774 "pmulld $tmp2,$tmp\n\t" 5775 "movd $dst,$tmp2\t! mul reduction2I" %} 5776 ins_encode %{ 5777 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5778 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5779 __ movdl($tmp$$XMMRegister, $src1$$Register); 5780 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5781 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5782 %} 5783 ins_pipe( pipe_slow ); 5784 %} 5785 5786 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5787 predicate(UseAVX > 0); 5788 match(Set dst (MulReductionVI src1 src2)); 5789 effect(TEMP tmp, TEMP tmp2); 5790 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5791 "vpmulld $tmp,$src2,$tmp2\n\t" 5792 "movd $tmp2,$src1\n\t" 5793 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5794 "movd $dst,$tmp2\t! mul reduction2I" %} 5795 ins_encode %{ 5796 int vector_len = 0; 5797 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5798 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5799 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5800 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5801 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5802 %} 5803 ins_pipe( pipe_slow ); 5804 %} 5805 5806 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5807 predicate(UseSSE > 3 && UseAVX == 0); 5808 match(Set dst (MulReductionVI src1 src2)); 5809 effect(TEMP tmp, TEMP tmp2); 5810 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5811 "pmulld $tmp2,$src2\n\t" 5812 "pshufd $tmp,$tmp2,0x1\n\t" 5813 "pmulld $tmp2,$tmp\n\t" 5814 "movd $tmp,$src1\n\t" 5815 "pmulld $tmp2,$tmp\n\t" 5816 "movd $dst,$tmp2\t! mul reduction4I" %} 5817 ins_encode %{ 5818 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5819 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5820 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5821 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5822 __ movdl($tmp$$XMMRegister, $src1$$Register); 5823 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5824 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5825 %} 5826 ins_pipe( pipe_slow ); 5827 %} 5828 5829 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5830 predicate(UseAVX > 0); 5831 match(Set dst (MulReductionVI src1 src2)); 5832 effect(TEMP tmp, TEMP tmp2); 5833 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5834 "vpmulld $tmp,$src2,$tmp2\n\t" 5835 "pshufd $tmp2,$tmp,0x1\n\t" 5836 "vpmulld $tmp,$tmp,$tmp2\n\t" 5837 "movd $tmp2,$src1\n\t" 5838 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5839 "movd $dst,$tmp2\t! mul reduction4I" %} 5840 ins_encode %{ 5841 int vector_len = 0; 5842 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5843 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5844 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5845 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5846 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5847 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5848 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5849 %} 5850 ins_pipe( pipe_slow ); 5851 %} 5852 5853 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5854 predicate(UseAVX > 0); 5855 match(Set dst (MulReductionVI src1 src2)); 5856 effect(TEMP tmp, TEMP tmp2); 5857 format %{ "vextracti128_high $tmp,$src2\n\t" 5858 "vpmulld $tmp,$tmp,$src2\n\t" 5859 "pshufd $tmp2,$tmp,0xE\n\t" 5860 "vpmulld $tmp,$tmp,$tmp2\n\t" 5861 "pshufd $tmp2,$tmp,0x1\n\t" 5862 "vpmulld $tmp,$tmp,$tmp2\n\t" 5863 "movd $tmp2,$src1\n\t" 5864 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5865 "movd $dst,$tmp2\t! mul reduction8I" %} 5866 ins_encode %{ 5867 int vector_len = 0; 5868 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5869 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5870 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5871 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5872 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5873 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5874 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5875 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5876 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5877 %} 5878 ins_pipe( pipe_slow ); 5879 %} 5880 5881 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5882 predicate(UseAVX > 2); 5883 match(Set dst (MulReductionVI src1 src2)); 5884 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5885 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5886 "vpmulld $tmp3,$tmp3,$src2\n\t" 5887 "vextracti128_high $tmp,$tmp3\n\t" 5888 "vpmulld $tmp,$tmp,$src2\n\t" 5889 "pshufd $tmp2,$tmp,0xE\n\t" 5890 "vpmulld $tmp,$tmp,$tmp2\n\t" 5891 "pshufd $tmp2,$tmp,0x1\n\t" 5892 "vpmulld $tmp,$tmp,$tmp2\n\t" 5893 "movd $tmp2,$src1\n\t" 5894 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5895 "movd $dst,$tmp2\t! mul reduction16I" %} 5896 ins_encode %{ 5897 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5898 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5899 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5900 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5901 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5902 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5903 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5904 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5905 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5906 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5907 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5908 %} 5909 ins_pipe( pipe_slow ); 5910 %} 5911 5912 #ifdef _LP64 5913 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5914 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5915 match(Set dst (MulReductionVL src1 src2)); 5916 effect(TEMP tmp, TEMP tmp2); 5917 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5918 "vpmullq $tmp,$src2,$tmp2\n\t" 5919 "movdq $tmp2,$src1\n\t" 5920 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5921 "movdq $dst,$tmp2\t! mul reduction2L" %} 5922 ins_encode %{ 5923 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5924 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5925 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5926 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5927 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5933 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5934 match(Set dst (MulReductionVL src1 src2)); 5935 effect(TEMP tmp, TEMP tmp2); 5936 format %{ "vextracti128_high $tmp,$src2\n\t" 5937 "vpmullq $tmp2,$tmp,$src2\n\t" 5938 "pshufd $tmp,$tmp2,0xE\n\t" 5939 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5940 "movdq $tmp,$src1\n\t" 5941 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5942 "movdq $dst,$tmp2\t! mul reduction4L" %} 5943 ins_encode %{ 5944 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5945 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5946 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5947 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5948 __ movdq($tmp$$XMMRegister, $src1$$Register); 5949 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5950 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5951 %} 5952 ins_pipe( pipe_slow ); 5953 %} 5954 5955 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5956 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5957 match(Set dst (MulReductionVL src1 src2)); 5958 effect(TEMP tmp, TEMP tmp2); 5959 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5960 "vpmullq $tmp2,$tmp2,$src2\n\t" 5961 "vextracti128_high $tmp,$tmp2\n\t" 5962 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5963 "pshufd $tmp,$tmp2,0xE\n\t" 5964 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5965 "movdq $tmp,$src1\n\t" 5966 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5967 "movdq $dst,$tmp2\t! mul reduction8L" %} 5968 ins_encode %{ 5969 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5970 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5971 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5972 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5973 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5974 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5975 __ movdq($tmp$$XMMRegister, $src1$$Register); 5976 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5977 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 #endif 5982 5983 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5984 predicate(UseSSE >= 1 && UseAVX == 0); 5985 match(Set dst (MulReductionVF dst src2)); 5986 effect(TEMP dst, TEMP tmp); 5987 format %{ "mulss $dst,$src2\n\t" 5988 "pshufd $tmp,$src2,0x01\n\t" 5989 "mulss $dst,$tmp\t! mul reduction2F" %} 5990 ins_encode %{ 5991 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5992 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5993 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5994 %} 5995 ins_pipe( pipe_slow ); 5996 %} 5997 5998 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5999 predicate(UseAVX > 0); 6000 match(Set dst (MulReductionVF dst src2)); 6001 effect(TEMP tmp, TEMP dst); 6002 format %{ "vmulss $dst,$dst,$src2\n\t" 6003 "pshufd $tmp,$src2,0x01\n\t" 6004 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 6005 ins_encode %{ 6006 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6007 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6008 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6014 predicate(UseSSE >= 1 && UseAVX == 0); 6015 match(Set dst (MulReductionVF dst src2)); 6016 effect(TEMP dst, TEMP tmp); 6017 format %{ "mulss $dst,$src2\n\t" 6018 "pshufd $tmp,$src2,0x01\n\t" 6019 "mulss $dst,$tmp\n\t" 6020 "pshufd $tmp,$src2,0x02\n\t" 6021 "mulss $dst,$tmp\n\t" 6022 "pshufd $tmp,$src2,0x03\n\t" 6023 "mulss $dst,$tmp\t! mul reduction4F" %} 6024 ins_encode %{ 6025 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 6026 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6027 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6028 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6029 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6030 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6031 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 6032 %} 6033 ins_pipe( pipe_slow ); 6034 %} 6035 6036 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6037 predicate(UseAVX > 0); 6038 match(Set dst (MulReductionVF dst src2)); 6039 effect(TEMP tmp, TEMP dst); 6040 format %{ "vmulss $dst,$dst,$src2\n\t" 6041 "pshufd $tmp,$src2,0x01\n\t" 6042 "vmulss $dst,$dst,$tmp\n\t" 6043 "pshufd $tmp,$src2,0x02\n\t" 6044 "vmulss $dst,$dst,$tmp\n\t" 6045 "pshufd $tmp,$src2,0x03\n\t" 6046 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 6047 ins_encode %{ 6048 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6049 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6050 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6051 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6052 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6053 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6054 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6055 %} 6056 ins_pipe( pipe_slow ); 6057 %} 6058 6059 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 6060 predicate(UseAVX > 0); 6061 match(Set dst (MulReductionVF dst src2)); 6062 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6063 format %{ "vmulss $dst,$dst,$src2\n\t" 6064 "pshufd $tmp,$src2,0x01\n\t" 6065 "vmulss $dst,$dst,$tmp\n\t" 6066 "pshufd $tmp,$src2,0x02\n\t" 6067 "vmulss $dst,$dst,$tmp\n\t" 6068 "pshufd $tmp,$src2,0x03\n\t" 6069 "vmulss $dst,$dst,$tmp\n\t" 6070 "vextractf128_high $tmp2,$src2\n\t" 6071 "vmulss $dst,$dst,$tmp2\n\t" 6072 "pshufd $tmp,$tmp2,0x01\n\t" 6073 "vmulss $dst,$dst,$tmp\n\t" 6074 "pshufd $tmp,$tmp2,0x02\n\t" 6075 "vmulss $dst,$dst,$tmp\n\t" 6076 "pshufd $tmp,$tmp2,0x03\n\t" 6077 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 6078 ins_encode %{ 6079 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6080 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6081 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6082 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6083 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6084 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6085 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6086 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6087 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6088 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6089 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6090 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6091 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6092 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6093 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6094 %} 6095 ins_pipe( pipe_slow ); 6096 %} 6097 6098 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 6099 predicate(UseAVX > 2); 6100 match(Set dst (MulReductionVF dst src2)); 6101 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6102 format %{ "vmulss $dst,$dst,$src2\n\t" 6103 "pshufd $tmp,$src2,0x01\n\t" 6104 "vmulss $dst,$dst,$tmp\n\t" 6105 "pshufd $tmp,$src2,0x02\n\t" 6106 "vmulss $dst,$dst,$tmp\n\t" 6107 "pshufd $tmp,$src2,0x03\n\t" 6108 "vmulss $dst,$dst,$tmp\n\t" 6109 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6110 "vmulss $dst,$dst,$tmp2\n\t" 6111 "pshufd $tmp,$tmp2,0x01\n\t" 6112 "vmulss $dst,$dst,$tmp\n\t" 6113 "pshufd $tmp,$tmp2,0x02\n\t" 6114 "vmulss $dst,$dst,$tmp\n\t" 6115 "pshufd $tmp,$tmp2,0x03\n\t" 6116 "vmulss $dst,$dst,$tmp\n\t" 6117 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6118 "vmulss $dst,$dst,$tmp2\n\t" 6119 "pshufd $tmp,$tmp2,0x01\n\t" 6120 "vmulss $dst,$dst,$tmp\n\t" 6121 "pshufd $tmp,$tmp2,0x02\n\t" 6122 "vmulss $dst,$dst,$tmp\n\t" 6123 "pshufd $tmp,$tmp2,0x03\n\t" 6124 "vmulss $dst,$dst,$tmp\n\t" 6125 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6126 "vmulss $dst,$dst,$tmp2\n\t" 6127 "pshufd $tmp,$tmp2,0x01\n\t" 6128 "vmulss $dst,$dst,$tmp\n\t" 6129 "pshufd $tmp,$tmp2,0x02\n\t" 6130 "vmulss $dst,$dst,$tmp\n\t" 6131 "pshufd $tmp,$tmp2,0x03\n\t" 6132 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 6133 ins_encode %{ 6134 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6135 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6136 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6137 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6138 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6139 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6140 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6141 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6142 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6143 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6144 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6145 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6146 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6147 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6148 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6149 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6150 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6151 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6152 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6153 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6154 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6155 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6156 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6157 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6158 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6159 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6160 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6161 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6162 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6163 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6164 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6170 predicate(UseSSE >= 1 && UseAVX == 0); 6171 match(Set dst (MulReductionVD dst src2)); 6172 effect(TEMP dst, TEMP tmp); 6173 format %{ "mulsd $dst,$src2\n\t" 6174 "pshufd $tmp,$src2,0xE\n\t" 6175 "mulsd $dst,$tmp\t! mul reduction2D" %} 6176 ins_encode %{ 6177 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 6178 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6179 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 6180 %} 6181 ins_pipe( pipe_slow ); 6182 %} 6183 6184 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6185 predicate(UseAVX > 0); 6186 match(Set dst (MulReductionVD dst src2)); 6187 effect(TEMP tmp, TEMP dst); 6188 format %{ "vmulsd $dst,$dst,$src2\n\t" 6189 "pshufd $tmp,$src2,0xE\n\t" 6190 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 6191 ins_encode %{ 6192 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6193 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6194 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6195 %} 6196 ins_pipe( pipe_slow ); 6197 %} 6198 6199 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 6200 predicate(UseAVX > 0); 6201 match(Set dst (MulReductionVD dst src2)); 6202 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6203 format %{ "vmulsd $dst,$dst,$src2\n\t" 6204 "pshufd $tmp,$src2,0xE\n\t" 6205 "vmulsd $dst,$dst,$tmp\n\t" 6206 "vextractf128_high $tmp2,$src2\n\t" 6207 "vmulsd $dst,$dst,$tmp2\n\t" 6208 "pshufd $tmp,$tmp2,0xE\n\t" 6209 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 6210 ins_encode %{ 6211 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6212 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6213 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6214 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6215 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6216 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6217 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6218 %} 6219 ins_pipe( pipe_slow ); 6220 %} 6221 6222 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 6223 predicate(UseAVX > 2); 6224 match(Set dst (MulReductionVD dst src2)); 6225 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6226 format %{ "vmulsd $dst,$dst,$src2\n\t" 6227 "pshufd $tmp,$src2,0xE\n\t" 6228 "vmulsd $dst,$dst,$tmp\n\t" 6229 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6230 "vmulsd $dst,$dst,$tmp2\n\t" 6231 "pshufd $tmp,$src2,0xE\n\t" 6232 "vmulsd $dst,$dst,$tmp\n\t" 6233 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6234 "vmulsd $dst,$dst,$tmp2\n\t" 6235 "pshufd $tmp,$tmp2,0xE\n\t" 6236 "vmulsd $dst,$dst,$tmp\n\t" 6237 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6238 "vmulsd $dst,$dst,$tmp2\n\t" 6239 "pshufd $tmp,$tmp2,0xE\n\t" 6240 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 6241 ins_encode %{ 6242 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6243 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6244 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6245 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6246 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6247 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6248 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6249 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6250 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6251 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6252 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6253 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6254 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6255 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6256 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6257 %} 6258 ins_pipe( pipe_slow ); 6259 %} 6260 6261 instruct rsand8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 6262 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6263 match(Set dst (AndReductionV src1 src2)); 6264 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6265 format %{ 6266 "pshufd $tmp,$src2,0x1\n\t" 6267 "pand $tmp,$src2\n\t" 6268 "movzbl $dst,$src1\n\t" 6269 "pextrb $tmp2,$tmp, 0x0\n\t" 6270 "andb $dst,$tmp2\n\t" 6271 "pextrb $tmp2,$tmp, 0x1\n\t" 6272 "andb $dst,$tmp2\n\t" 6273 "pextrb $tmp2,$tmp, 0x2\n\t" 6274 "andb $dst,$tmp2\n\t" 6275 "pextrb $tmp2,$tmp, 0x3\n\t" 6276 "andb $dst,$tmp2\n\t" 6277 "movsbl $dst,$dst\t! and reduction8B" %} 6278 ins_encode %{ 6279 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 6280 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6281 __ movzbl($dst$$Register, $src1$$Register); 6282 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6283 __ andb($dst$$Register, $tmp2$$Register); 6284 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6285 __ andb($dst$$Register, $tmp2$$Register); 6286 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 6287 __ andb($dst$$Register, $tmp2$$Register); 6288 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 6289 __ andb($dst$$Register, $tmp2$$Register); 6290 __ movsbl($dst$$Register, $dst$$Register); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct rsand16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6296 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6297 match(Set dst (AndReductionV src1 src2)); 6298 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6299 format %{ "pshufd $tmp,$src2,0xE\n\t" 6300 "pand $tmp,$src2\n\t" 6301 "pshufd $tmp2,$tmp,0x1\n\t" 6302 "pand $tmp,$tmp,$tmp2\n\t" 6303 "movzbl $dst,$src1\n\t" 6304 "pextrb $tmp3,$tmp, 0x0\n\t" 6305 "andb $dst,$tmp3\n\t" 6306 "pextrb $tmp3,$tmp, 0x1\n\t" 6307 "andb $dst,$tmp3\n\t" 6308 "pextrb $tmp3,$tmp, 0x2\n\t" 6309 "andb $dst,$tmp3\n\t" 6310 "pextrb $tmp3,$tmp, 0x3\n\t" 6311 "andb $dst,$tmp3\n\t" 6312 "movsbl $dst,$dst\t! and reduction16B" %} 6313 ins_encode %{ 6314 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6315 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6316 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6317 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 6318 __ movzbl($dst$$Register, $src1$$Register); 6319 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6320 __ andb($dst$$Register, $tmp3$$Register); 6321 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6322 __ andb($dst$$Register, $tmp3$$Register); 6323 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 6324 __ andb($dst$$Register, $tmp3$$Register); 6325 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 6326 __ andb($dst$$Register, $tmp3$$Register); 6327 __ movsbl($dst$$Register, $dst$$Register); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct rvand32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6333 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6334 match(Set dst (AndReductionV src1 src2)); 6335 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6336 format %{ "vextracti128_high $tmp,$src2\n\t" 6337 "vpand $tmp,$tmp,$src2\n\t" 6338 "pshufd $tmp2,$tmp,0xE\n\t" 6339 "vpand $tmp,$tmp,$tmp2\n\t" 6340 "pshufd $tmp2,$tmp,0x1\n\t" 6341 "vpand $tmp,$tmp,$tmp2\n\t" 6342 "movzbl $dst,$src1\n\t" 6343 "pextrb $tmp3,$tmp, 0x0\n\t" 6344 "andb $dst,$tmp3\n\t" 6345 "pextrb $tmp3,$tmp, 0x1\n\t" 6346 "andb $dst,$tmp3\n\t" 6347 "pextrb $tmp3,$tmp, 0x2\n\t" 6348 "andb $dst,$tmp3\n\t" 6349 "pextrb $tmp3,$tmp, 0x3\n\t" 6350 "andb $dst,$tmp3\n\t" 6351 "movsbl $dst,$dst\t! and reduction32B" %} 6352 ins_encode %{ 6353 int vector_len = 0; 6354 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6355 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6356 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6357 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6358 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6359 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6360 __ movzbl($dst$$Register, $src1$$Register); 6361 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6362 __ andb($dst$$Register, $tmp3$$Register); 6363 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6364 __ andb($dst$$Register, $tmp3$$Register); 6365 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 6366 __ andb($dst$$Register, $tmp3$$Register); 6367 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 6368 __ andb($dst$$Register, $tmp3$$Register); 6369 __ movsbl($dst$$Register, $dst$$Register); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct rvand64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6375 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6376 match(Set dst (AndReductionV src1 src2)); 6377 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6378 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6379 "vpand $tmp2,$tmp2,$src2\n\t" 6380 "vextracti128_high $tmp,$tmp2\n\t" 6381 "vpand $tmp,$tmp,$tmp2\n\t" 6382 "pshufd $tmp2,$tmp,0xE\n\t" 6383 "vpand $tmp,$tmp,$tmp2\n\t" 6384 "pshufd $tmp2,$tmp,0x1\n\t" 6385 "vpand $tmp,$tmp,$tmp2\n\t" 6386 "movzbl $dst,$src1\n\t" 6387 "movdl $tmp3,$tmp\n\t" 6388 "andb $dst,$tmp3\n\t" 6389 "shrl $tmp3,0x8\n\t" 6390 "andb $dst,$tmp3\n\t" 6391 "shrl $tmp3,0x8\n\t" 6392 "andb $dst,$tmp3\n\t" 6393 "shrl $tmp3,0x8\n\t" 6394 "andb $dst,$tmp3\n\t" 6395 "movsbl $dst,$dst\t! and reduction64B" %} 6396 ins_encode %{ 6397 int vector_len = 0; 6398 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6399 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6400 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6401 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6402 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6403 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6404 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6405 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6406 __ movzbl($dst$$Register, $src1$$Register); 6407 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 6408 __ andb($dst$$Register, $tmp3$$Register); 6409 __ shrl($tmp3$$Register, 8); 6410 __ andb($dst$$Register, $tmp3$$Register); 6411 __ shrl($tmp3$$Register, 8); 6412 __ andb($dst$$Register, $tmp3$$Register); 6413 __ shrl($tmp3$$Register, 8); 6414 __ andb($dst$$Register, $tmp3$$Register); 6415 __ movsbl($dst$$Register, $dst$$Register); 6416 %} 6417 ins_pipe( pipe_slow ); 6418 %} 6419 6420 instruct rsand4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 6421 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6422 match(Set dst (AndReductionV src1 src2)); 6423 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6424 format %{ 6425 "pshufd $tmp,$src2,0x1\n\t" 6426 "pand $tmp,$src2\n\t" 6427 "movzwl $dst,$src1\n\t" 6428 "pextrw $tmp2,$tmp, 0x0\n\t" 6429 "andw $dst,$tmp2\n\t" 6430 "pextrw $tmp2,$tmp, 0x1\n\t" 6431 "andw $dst,$tmp2\n\t" 6432 "movswl $dst,$dst\t! and reduction4S" %} 6433 ins_encode %{ 6434 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 6435 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6436 __ movzwl($dst$$Register, $src1$$Register); 6437 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6438 __ andw($dst$$Register, $tmp2$$Register); 6439 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6440 __ andw($dst$$Register, $tmp2$$Register); 6441 __ movswl($dst$$Register, $dst$$Register); 6442 %} 6443 ins_pipe( pipe_slow ); 6444 %} 6445 6446 instruct rsand8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6447 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6448 match(Set dst (AndReductionV src1 src2)); 6449 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6450 format %{ "pshufd $tmp,$src2,0xE\n\t" 6451 "pand $tmp,$src2\n\t" 6452 "pshufd $tmp2,$tmp,0x1\n\t" 6453 "pand $tmp,$tmp,$tmp2\n\t" 6454 "movzwl $dst,$src1\n\t" 6455 "pextrw $tmp3,$tmp, 0x0\n\t" 6456 "andw $dst,$tmp3\n\t" 6457 "pextrw $tmp3,$tmp, 0x1\n\t" 6458 "andw $dst,$tmp3\n\t" 6459 "movswl $dst,$dst\t! and reduction8S" %} 6460 ins_encode %{ 6461 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6462 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 6463 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6464 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 6465 __ movzwl($dst$$Register, $src1$$Register); 6466 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6467 __ andw($dst$$Register, $tmp3$$Register); 6468 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6469 __ andw($dst$$Register, $tmp3$$Register); 6470 __ movswl($dst$$Register, $dst$$Register); 6471 %} 6472 ins_pipe( pipe_slow ); 6473 %} 6474 6475 instruct rvand16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6476 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6477 match(Set dst (AndReductionV src1 src2)); 6478 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6479 format %{ "vextracti128_high $tmp,$src2\n\t" 6480 "vpand $tmp,$tmp,$src2\n\t" 6481 "pshufd $tmp2,$tmp,0xE\n\t" 6482 "vpand $tmp,$tmp,$tmp2\n\t" 6483 "pshufd $tmp2,$tmp,0x1\n\t" 6484 "vpand $tmp,$tmp,$tmp2\n\t" 6485 "movzwl $dst,$src1\n\t" 6486 "pextrw $tmp3,$tmp, 0x0\n\t" 6487 "andw $dst,$tmp3\n\t" 6488 "pextrw $tmp3,$tmp, 0x1\n\t" 6489 "andw $dst,$tmp3\n\t" 6490 "movswl $dst,$dst\t! and reduction16S" %} 6491 ins_encode %{ 6492 int vector_len = 0; 6493 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6494 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6495 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6496 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6497 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6498 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6499 __ movzwl($dst$$Register, $src1$$Register); 6500 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6501 __ andw($dst$$Register, $tmp3$$Register); 6502 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 6503 __ andw($dst$$Register, $tmp3$$Register); 6504 __ movswl($dst$$Register, $dst$$Register); 6505 %} 6506 ins_pipe( pipe_slow ); 6507 %} 6508 6509 instruct rvand32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6510 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6511 match(Set dst (AndReductionV src1 src2)); 6512 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6513 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6514 "vpand $tmp2,$tmp2,$src2\n\t" 6515 "vextracti128_high $tmp,$tmp2\n\t" 6516 "vpand $tmp,$tmp,$tmp2\n\t" 6517 "pshufd $tmp2,$tmp,0xE\n\t" 6518 "vpand $tmp,$tmp,$tmp2\n\t" 6519 "pshufd $tmp2,$tmp,0x1\n\t" 6520 "vpand $tmp,$tmp,$tmp2\n\t" 6521 "movzwl $dst,$src1\n\t" 6522 "movdl $tmp3,$tmp\n\t" 6523 "andw $dst,$tmp3\n\t" 6524 "shrl $tmp3,0x16\n\t" 6525 "andw $dst,$tmp3\n\t" 6526 "movswl $dst,$dst\t! and reduction32S" %} 6527 ins_encode %{ 6528 int vector_len = 0; 6529 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6530 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6531 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6532 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6533 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6534 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6535 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6536 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6537 __ movzwl($dst$$Register, $src1$$Register); 6538 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 6539 __ andw($dst$$Register, $tmp3$$Register); 6540 __ shrl($tmp3$$Register, 16); 6541 __ andw($dst$$Register, $tmp3$$Register); 6542 __ movswl($dst$$Register, $dst$$Register); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6548 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6549 match(Set dst (AndReductionV src1 src2)); 6550 effect(TEMP tmp, TEMP tmp2); 6551 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6552 "pand $tmp2,$src2\n\t" 6553 "movd $tmp,$src1\n\t" 6554 "pand $tmp2,$tmp\n\t" 6555 "movd $dst,$tmp2\t! and reduction2I" %} 6556 ins_encode %{ 6557 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6558 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 6559 __ movdl($tmp$$XMMRegister, $src1$$Register); 6560 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6561 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6562 %} 6563 ins_pipe( pipe_slow ); 6564 %} 6565 6566 instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6567 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6568 match(Set dst (AndReductionV src1 src2)); 6569 effect(TEMP tmp, TEMP tmp2); 6570 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6571 "pand $tmp2,$src2\n\t" 6572 "pshufd $tmp,$tmp2,0x1\n\t" 6573 "pand $tmp2,$tmp\n\t" 6574 "movd $tmp,$src1\n\t" 6575 "pand $tmp2,$tmp\n\t" 6576 "movd $dst,$tmp2\t! and reduction4I" %} 6577 ins_encode %{ 6578 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6579 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 6580 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 6581 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6582 __ movdl($tmp$$XMMRegister, $src1$$Register); 6583 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6584 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6585 %} 6586 ins_pipe( pipe_slow ); 6587 %} 6588 6589 instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6590 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6591 match(Set dst (AndReductionV src1 src2)); 6592 effect(TEMP tmp, TEMP tmp2); 6593 format %{ "vextracti128_high $tmp,$src2\n\t" 6594 "vpand $tmp,$tmp,$src2\n\t" 6595 "vpshufd $tmp2,$tmp,0xE\n\t" 6596 "vpand $tmp,$tmp,$tmp2\n\t" 6597 "vpshufd $tmp2,$tmp,0x1\n\t" 6598 "vpand $tmp,$tmp,$tmp2\n\t" 6599 "movd $tmp2,$src1\n\t" 6600 "vpand $tmp2,$tmp,$tmp2\n\t" 6601 "movd $dst,$tmp2\t! and reduction8I" %} 6602 ins_encode %{ 6603 int vector_len = 0; 6604 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6605 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6606 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6607 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6608 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6609 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6610 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6611 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6612 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6613 %} 6614 ins_pipe( pipe_slow ); 6615 %} 6616 6617 instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6618 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6619 match(Set dst (AndReductionV src1 src2)); 6620 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6621 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6622 "vpand $tmp3,$tmp3,$src2\n\t" 6623 "vextracti128_high $tmp,$tmp3\n\t" 6624 "vpand $tmp,$tmp,$src2\n\t" 6625 "vpshufd $tmp2,$tmp,0xE\n\t" 6626 "vpand $tmp,$tmp,$tmp2\n\t" 6627 "vpshufd $tmp2,$tmp,0x1\n\t" 6628 "vpand $tmp,$tmp,$tmp2\n\t" 6629 "movd $tmp2,$src1\n\t" 6630 "vpand $tmp2,$tmp,$tmp2\n\t" 6631 "movd $dst,$tmp2\t! and reduction16I" %} 6632 ins_encode %{ 6633 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6634 __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6635 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6636 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 6637 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); 6638 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6639 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); 6640 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6641 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6642 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6643 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6644 %} 6645 ins_pipe( pipe_slow ); 6646 %} 6647 6648 #ifdef _LP64 6649 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6650 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 6651 match(Set dst (AndReductionV src1 src2)); 6652 effect(TEMP tmp, TEMP tmp2); 6653 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6654 "pand $tmp2,$src2\n\t" 6655 "movdq $tmp,$src1\n\t" 6656 "pand $tmp2,$tmp\n\t" 6657 "movq $dst,$tmp2\t! and reduction2L" %} 6658 ins_encode %{ 6659 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6660 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 6661 __ movdq($tmp$$XMMRegister, $src1$$Register); 6662 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 6663 __ movq($dst$$Register, $tmp2$$XMMRegister); 6664 %} 6665 ins_pipe( pipe_slow ); 6666 %} 6667 6668 instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6669 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 6670 match(Set dst (AndReductionV src1 src2)); 6671 effect(TEMP tmp, TEMP tmp2); 6672 format %{ "vextracti128_high $tmp,$src2\n\t" 6673 "vpand $tmp2,$tmp,$src2\n\t" 6674 "vpshufd $tmp,$tmp2,0xE\n\t" 6675 "vpand $tmp2,$tmp2,$tmp\n\t" 6676 "movq $tmp,$src1\n\t" 6677 "vpand $tmp2,$tmp2,$tmp\n\t" 6678 "movq $dst,$tmp2\t! and reduction4L" %} 6679 ins_encode %{ 6680 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6681 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 6682 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 6683 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6684 __ movq($tmp$$XMMRegister, $src1$$Register); 6685 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6686 __ movq($dst$$Register, $tmp2$$XMMRegister); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6692 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 6693 match(Set dst (AndReductionV src1 src2)); 6694 effect(TEMP tmp, TEMP tmp2); 6695 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6696 "vpandq $tmp2,$tmp2,$src2\n\t" 6697 "vextracti128_high $tmp,$tmp2\n\t" 6698 "vpandq $tmp2,$tmp2,$tmp\n\t" 6699 "vpshufd $tmp,$tmp2,0xE\n\t" 6700 "vpandq $tmp2,$tmp2,$tmp\n\t" 6701 "movdq $tmp,$src1\n\t" 6702 "vpandq $tmp2,$tmp2,$tmp\n\t" 6703 "movdq $dst,$tmp2\t! and reduction8L" %} 6704 ins_encode %{ 6705 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6706 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6707 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6708 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6709 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 6710 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6711 __ movdq($tmp$$XMMRegister, $src1$$Register); 6712 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6713 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6714 %} 6715 ins_pipe( pipe_slow ); 6716 %} 6717 #endif 6718 6719 instruct rsor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6720 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6721 match(Set dst (OrReductionV src1 src2)); 6722 effect(TEMP tmp, TEMP tmp2); 6723 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6724 "por $tmp2,$src2\n\t" 6725 "movd $tmp,$src1\n\t" 6726 "por $tmp2,$tmp\n\t" 6727 "movd $dst,$tmp2\t! or reduction2I" %} 6728 ins_encode %{ 6729 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6730 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 6731 __ movdl($tmp$$XMMRegister, $src1$$Register); 6732 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6733 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6734 %} 6735 ins_pipe( pipe_slow ); 6736 %} 6737 6738 instruct rsor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6739 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6740 match(Set dst (OrReductionV src1 src2)); 6741 effect(TEMP tmp, TEMP tmp2); 6742 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6743 "por $tmp2,$src2\n\t" 6744 "pshufd $tmp,$tmp2,0x1\n\t" 6745 "por $tmp2,$tmp\n\t" 6746 "movd $tmp,$src1\n\t" 6747 "por $tmp2,$tmp\n\t" 6748 "movd $dst,$tmp2\t! or reduction4I" %} 6749 ins_encode %{ 6750 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6751 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 6752 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 6753 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6754 __ movdl($tmp$$XMMRegister, $src1$$Register); 6755 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6756 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct rvor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6762 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_INT); 6763 match(Set dst (OrReductionV src1 src2)); 6764 effect(TEMP tmp, TEMP tmp2); 6765 format %{ "vextracti128_high $tmp,$src2\n\t" 6766 "vpor $tmp,$tmp,$src2\n\t" 6767 "vpshufd $tmp2,$tmp,0xE\t" 6768 "vpor $tmp,$tmp,$tmp2\n\t" 6769 "vpshufd $tmp2,$tmp,0x1\t" 6770 "vpor $tmp,$tmp,$tmp2\n\t" 6771 "movd $tmp2,$src1\n\t" 6772 "vpor $tmp2,$tmp,$tmp2\n\t" 6773 "movd $dst,$tmp2\t! or reduction8I" %} 6774 ins_encode %{ 6775 int vector_len = 0; 6776 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6777 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6778 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6779 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6780 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6781 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6782 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6783 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6784 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6785 %} 6786 ins_pipe( pipe_slow ); 6787 %} 6788 6789 instruct rvor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6790 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_INT); 6791 match(Set dst (OrReductionV src1 src2)); 6792 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6793 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6794 "vpor $tmp3,$tmp3,$src2\n\t" 6795 "vextracti128_high $tmp,$tmp3\n\t" 6796 "vpor $tmp,$tmp,$src2\n\t" 6797 "vpshufd $tmp2,$tmp,0xE\t" 6798 "vpor $tmp,$tmp,$tmp2\n\t" 6799 "vpshufd $tmp2,$tmp,0x1\t" 6800 "vpor $tmp,$tmp,$tmp2\n\t" 6801 "movd $tmp2,$src1\n\t" 6802 "vpor $tmp2,$tmp,$tmp2\n\t" 6803 "movd $dst,$tmp2\t! or reduction16I" %} 6804 ins_encode %{ 6805 int vector_len = 0; 6806 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6807 __ vpor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6808 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6809 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 6810 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6811 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6812 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6813 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6814 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6815 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6816 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 instruct rsor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6822 predicate(UseSSE >= 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6823 match(Set dst (OrReductionV src1 src2)); 6824 effect(TEMP tmp, TEMP tmp2); 6825 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6826 "por $tmp2,$src2\n\t" 6827 "movdq $tmp,$src1\n\t" 6828 "por $tmp2,$tmp\n\t" 6829 "movq $dst,$tmp2\t! or reduction2L" %} 6830 ins_encode %{ 6831 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6832 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 6833 __ movdq($tmp$$XMMRegister, $src1$$Register); 6834 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 6835 __ movq($dst$$Register, $tmp2$$XMMRegister); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 instruct rvor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6841 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6842 match(Set dst (OrReductionV src1 src2)); 6843 effect(TEMP tmp, TEMP tmp2); 6844 format %{ "vextracti128_high $tmp,$src2\n\t" 6845 "vpor $tmp2,$tmp,$src2\n\t" 6846 "vpshufd $tmp,$tmp2,0xE\t" 6847 "vpor $tmp2,$tmp2,$tmp\n\t" 6848 "movq $tmp,$src1\n\t" 6849 "vpor $tmp2,$tmp2,$tmp\n\t" 6850 "movq $dst,$tmp2\t! or reduction4L" %} 6851 ins_encode %{ 6852 int vector_len = 0; 6853 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6854 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6855 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 6856 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6857 __ movq($tmp$$XMMRegister, $src1$$Register); 6858 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6859 __ movq($dst$$Register, $tmp2$$XMMRegister); 6860 %} 6861 ins_pipe( pipe_slow ); 6862 %} 6863 6864 #ifdef _LP64 6865 instruct rvor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6866 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6867 match(Set dst (OrReductionV src1 src2)); 6868 effect(TEMP tmp, TEMP tmp2); 6869 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6870 "vporq $tmp2,$tmp2,$src2\n\t" 6871 "vextracti128_high $tmp,$tmp2\n\t" 6872 "vporq $tmp2,$tmp2,$tmp\n\t" 6873 "vpshufd $tmp,$tmp2,0xE\t" 6874 "vporq $tmp2,$tmp2,$tmp\n\t" 6875 "movdq $tmp,$src1\n\t" 6876 "vporq $tmp2,$tmp2,$tmp\n\t" 6877 "movdq $dst,$tmp2\t! or reduction8L" %} 6878 ins_encode %{ 6879 int vector_len = 0; 6880 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6881 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6882 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6883 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6884 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 6885 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6886 __ movdq($tmp$$XMMRegister, $src1$$Register); 6887 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6888 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6889 %} 6890 ins_pipe( pipe_slow ); 6891 %} 6892 #endif 6893 6894 instruct rsxor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6895 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6896 match(Set dst (XorReductionV src1 src2)); 6897 effect(TEMP tmp, TEMP tmp2); 6898 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6899 "pxor $tmp2,$src2\n\t" 6900 "movd $tmp,$src1\n\t" 6901 "pxor $tmp2,$tmp\n\t" 6902 "movd $dst,$tmp2\t! xor reduction2I" %} 6903 ins_encode %{ 6904 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6905 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 6906 __ movdl($tmp$$XMMRegister, $src1$$Register); 6907 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 6908 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6909 %} 6910 ins_pipe( pipe_slow ); 6911 %} 6912 6913 instruct rsxor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6914 predicate(UseSSE > 1 && n->in(1)->bottom_type()->basic_type() == T_INT); 6915 match(Set dst (XorReductionV src1 src2)); 6916 effect(TEMP tmp, TEMP tmp2); 6917 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6918 "pxor $tmp2,$src2\n\t" 6919 "pshufd $tmp,$tmp2,0x1\n\t" 6920 "pxor $tmp2,$tmp\n\t" 6921 "movd $tmp,$src1\n\t" 6922 "pxor $tmp2,$tmp\n\t" 6923 "movd $dst,$tmp2\t! xor reduction4I" %} 6924 ins_encode %{ 6925 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6926 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 6927 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 6928 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 6929 __ movdl($tmp$$XMMRegister, $src1$$Register); 6930 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 6931 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6932 %} 6933 ins_pipe( pipe_slow ); 6934 %} 6935 6936 instruct rvxor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6937 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_INT); 6938 match(Set dst (XorReductionV src1 src2)); 6939 effect(TEMP tmp, TEMP tmp2); 6940 format %{ "vextracti128_high $tmp,$src2\n\t" 6941 "vpxor $tmp,$tmp,$src2\n\t" 6942 "vpshufd $tmp2,$tmp,0xE\t" 6943 "vpxor $tmp,$tmp,$tmp2\n\t" 6944 "vpshufd $tmp2,$tmp,0x1\t" 6945 "vpxor $tmp,$tmp,$tmp2\n\t" 6946 "movd $tmp2,$src1\n\t" 6947 "vpxor $tmp2,$tmp,$tmp2\n\t" 6948 "movd $dst,$tmp2\t! xor reduction8I" %} 6949 ins_encode %{ 6950 int vector_len = 0; 6951 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6952 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6953 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6954 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6955 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6956 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6957 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6958 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6959 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct rvxor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6965 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_INT); 6966 match(Set dst (XorReductionV src1 src2)); 6967 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6968 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6969 "vpxor $tmp3,$tmp3,$src2\n\t" 6970 "vextracti128_high $tmp,$tmp3\n\t" 6971 "vpxor $tmp,$tmp,$src2\n\t" 6972 "vpshufd $tmp2,$tmp,0xE\t" 6973 "vpxor $tmp,$tmp,$tmp2\n\t" 6974 "vpshufd $tmp2,$tmp,0x1\t" 6975 "vpxor $tmp,$tmp,$tmp2\n\t" 6976 "movd $tmp2,$src1\n\t" 6977 "vpxor $tmp2,$tmp,$tmp2\n\t" 6978 "movd $dst,$tmp2\t! xor reduction16I" %} 6979 ins_encode %{ 6980 int vector_len = 0; 6981 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6982 __ vpxor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6983 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6984 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 6985 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 6986 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6987 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 6988 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6989 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6990 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6991 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6992 %} 6993 ins_pipe( pipe_slow ); 6994 %} 6995 6996 instruct rsxor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6997 predicate(UseSSE >= 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 6998 match(Set dst (XorReductionV src1 src2)); 6999 effect(TEMP tmp, TEMP tmp2); 7000 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7001 "pxor $tmp2,$src2\n\t" 7002 "movdq $tmp,$src1\n\t" 7003 "pxor $tmp2,$tmp\n\t" 7004 "movq $dst,$tmp2\t! xor reduction2L" %} 7005 ins_encode %{ 7006 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7007 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 7008 __ movdq($tmp$$XMMRegister, $src1$$Register); 7009 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 7010 __ movq($dst$$Register, $tmp2$$XMMRegister); 7011 %} 7012 ins_pipe( pipe_slow ); 7013 %} 7014 7015 instruct rvxor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 7016 predicate(UseAVX > 0 && n->in(1)->bottom_type()->basic_type() == T_LONG); 7017 match(Set dst (XorReductionV src1 src2)); 7018 effect(TEMP tmp, TEMP tmp2); 7019 format %{ "vextracti128_high $tmp,$src2\n\t" 7020 "vpxor $tmp2,$tmp,$src2\n\t" 7021 "vpshufd $tmp,$tmp2,0xE\t" 7022 "vpxor $tmp2,$tmp2,$tmp\n\t" 7023 "movq $tmp,$src1\n\t" 7024 "vpxor $tmp2,$tmp2,$tmp\n\t" 7025 "movq $dst,$tmp2\t! xor reduction4L" %} 7026 ins_encode %{ 7027 int vector_len = 0; 7028 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7029 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7030 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 7031 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7032 __ movq($tmp$$XMMRegister, $src1$$Register); 7033 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7034 __ movq($dst$$Register, $tmp2$$XMMRegister); 7035 %} 7036 ins_pipe( pipe_slow ); 7037 %} 7038 7039 #ifdef _LP64 7040 instruct rvxor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 7041 predicate(UseAVX > 2 && n->in(1)->bottom_type()->basic_type() == T_LONG); 7042 match(Set dst (XorReductionV src1 src2)); 7043 effect(TEMP tmp, TEMP tmp2); 7044 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 7045 "vpxorq $tmp2,$tmp2,$src2\n\t" 7046 "vextracti128_high $tmp,$tmp2\n\t" 7047 "vpxorq $tmp2,$tmp2,$tmp\n\t" 7048 "vpshufd $tmp,$tmp2,0xE\t" 7049 "vpxorq $tmp2,$tmp2,$tmp\n\t" 7050 "movdq $tmp,$src1\n\t" 7051 "vpxorq $tmp2,$tmp2,$tmp\n\t" 7052 "movdq $dst,$tmp2\t! xor reduction8L" %} 7053 ins_encode %{ 7054 int vector_len = 0; 7055 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7056 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 7057 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 7058 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7059 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 7060 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7061 __ movdq($tmp$$XMMRegister, $src1$$Register); 7062 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 7063 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7064 %} 7065 ins_pipe( pipe_slow ); 7066 %} 7067 #endif 7068 7069 // ====================VECTOR ARITHMETIC======================================= 7070 7071 // --------------------------------- ADD -------------------------------------- 7072 7073 // Bytes vector add 7074 instruct vadd4B(vecS dst, vecS src) %{ 7075 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7076 match(Set dst (AddVB dst src)); 7077 format %{ "paddb $dst,$src\t! add packed4B" %} 7078 ins_encode %{ 7079 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 7080 %} 7081 ins_pipe( pipe_slow ); 7082 %} 7083 7084 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7085 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7086 match(Set dst (AddVB src1 src2)); 7087 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 7088 ins_encode %{ 7089 int vector_len = 0; 7090 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7096 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7097 match(Set dst (AddVB src1 src2)); 7098 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 7099 ins_encode %{ 7100 int vector_len = 0; 7101 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7107 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7108 match(Set dst (AddVB dst src2)); 7109 effect(TEMP src1); 7110 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 7111 ins_encode %{ 7112 int vector_len = 0; 7113 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7114 %} 7115 ins_pipe( pipe_slow ); 7116 %} 7117 7118 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 7119 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7120 match(Set dst (AddVB src (LoadVector mem))); 7121 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 7122 ins_encode %{ 7123 int vector_len = 0; 7124 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7125 %} 7126 ins_pipe( pipe_slow ); 7127 %} 7128 7129 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 7130 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7131 match(Set dst (AddVB src (LoadVector mem))); 7132 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 7133 ins_encode %{ 7134 int vector_len = 0; 7135 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7136 %} 7137 ins_pipe( pipe_slow ); 7138 %} 7139 7140 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7141 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7142 match(Set dst (AddVB dst (LoadVector mem))); 7143 effect(TEMP src); 7144 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 7145 ins_encode %{ 7146 int vector_len = 0; 7147 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vadd8B(vecD dst, vecD src) %{ 7153 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7154 match(Set dst (AddVB dst src)); 7155 format %{ "paddb $dst,$src\t! add packed8B" %} 7156 ins_encode %{ 7157 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 7158 %} 7159 ins_pipe( pipe_slow ); 7160 %} 7161 7162 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7163 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7164 match(Set dst (AddVB src1 src2)); 7165 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 7166 ins_encode %{ 7167 int vector_len = 0; 7168 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7169 %} 7170 ins_pipe( pipe_slow ); 7171 %} 7172 7173 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7174 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7175 match(Set dst (AddVB src1 src2)); 7176 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 7177 ins_encode %{ 7178 int vector_len = 0; 7179 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7180 %} 7181 ins_pipe( pipe_slow ); 7182 %} 7183 7184 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7185 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7186 match(Set dst (AddVB dst src2)); 7187 effect(TEMP src1); 7188 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 7189 ins_encode %{ 7190 int vector_len = 0; 7191 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 7197 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7198 match(Set dst (AddVB src (LoadVector mem))); 7199 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 7200 ins_encode %{ 7201 int vector_len = 0; 7202 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7203 %} 7204 ins_pipe( pipe_slow ); 7205 %} 7206 7207 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 7208 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7209 match(Set dst (AddVB src (LoadVector mem))); 7210 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 7211 ins_encode %{ 7212 int vector_len = 0; 7213 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7214 %} 7215 ins_pipe( pipe_slow ); 7216 %} 7217 7218 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7219 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7220 match(Set dst (AddVB dst (LoadVector mem))); 7221 effect(TEMP src); 7222 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 7223 ins_encode %{ 7224 int vector_len = 0; 7225 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7226 %} 7227 ins_pipe( pipe_slow ); 7228 %} 7229 7230 instruct vadd16B(vecX dst, vecX src) %{ 7231 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 7232 match(Set dst (AddVB dst src)); 7233 format %{ "paddb $dst,$src\t! add packed16B" %} 7234 ins_encode %{ 7235 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 7236 %} 7237 ins_pipe( pipe_slow ); 7238 %} 7239 7240 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7241 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7242 match(Set dst (AddVB src1 src2)); 7243 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 7244 ins_encode %{ 7245 int vector_len = 0; 7246 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7252 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7253 match(Set dst (AddVB src1 src2)); 7254 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 7255 ins_encode %{ 7256 int vector_len = 0; 7257 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7258 %} 7259 ins_pipe( pipe_slow ); 7260 %} 7261 7262 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7263 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7264 match(Set dst (AddVB dst src2)); 7265 effect(TEMP src1); 7266 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 7267 ins_encode %{ 7268 int vector_len = 0; 7269 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 7275 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 7276 match(Set dst (AddVB src (LoadVector mem))); 7277 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 7278 ins_encode %{ 7279 int vector_len = 0; 7280 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 7286 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7287 match(Set dst (AddVB src (LoadVector mem))); 7288 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 7289 ins_encode %{ 7290 int vector_len = 0; 7291 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7297 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7298 match(Set dst (AddVB dst (LoadVector mem))); 7299 effect(TEMP src); 7300 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 7301 ins_encode %{ 7302 int vector_len = 0; 7303 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7304 %} 7305 ins_pipe( pipe_slow ); 7306 %} 7307 7308 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7309 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7310 match(Set dst (AddVB src1 src2)); 7311 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 7312 ins_encode %{ 7313 int vector_len = 1; 7314 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7320 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7321 match(Set dst (AddVB src1 src2)); 7322 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 7323 ins_encode %{ 7324 int vector_len = 1; 7325 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7326 %} 7327 ins_pipe( pipe_slow ); 7328 %} 7329 7330 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7331 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 7332 match(Set dst (AddVB dst src2)); 7333 effect(TEMP src1); 7334 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 7335 ins_encode %{ 7336 int vector_len = 1; 7337 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7338 %} 7339 ins_pipe( pipe_slow ); 7340 %} 7341 7342 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 7343 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 7344 match(Set dst (AddVB src (LoadVector mem))); 7345 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 7346 ins_encode %{ 7347 int vector_len = 1; 7348 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7349 %} 7350 ins_pipe( pipe_slow ); 7351 %} 7352 7353 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 7354 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7355 match(Set dst (AddVB src (LoadVector mem))); 7356 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 7357 ins_encode %{ 7358 int vector_len = 1; 7359 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7360 %} 7361 ins_pipe( pipe_slow ); 7362 %} 7363 7364 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7365 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7366 match(Set dst (AddVB dst (LoadVector mem))); 7367 effect(TEMP src); 7368 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 7369 ins_encode %{ 7370 int vector_len = 1; 7371 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7372 %} 7373 ins_pipe( pipe_slow ); 7374 %} 7375 7376 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7378 match(Set dst (AddVB src1 src2)); 7379 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 7380 ins_encode %{ 7381 int vector_len = 2; 7382 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7383 %} 7384 ins_pipe( pipe_slow ); 7385 %} 7386 7387 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 7388 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 7389 match(Set dst (AddVB src (LoadVector mem))); 7390 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 7391 ins_encode %{ 7392 int vector_len = 2; 7393 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7394 %} 7395 ins_pipe( pipe_slow ); 7396 %} 7397 7398 // Shorts/Chars vector add 7399 instruct vadd2S(vecS dst, vecS src) %{ 7400 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7401 match(Set dst (AddVS dst src)); 7402 format %{ "paddw $dst,$src\t! add packed2S" %} 7403 ins_encode %{ 7404 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 7405 %} 7406 ins_pipe( pipe_slow ); 7407 %} 7408 7409 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7410 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7411 match(Set dst (AddVS src1 src2)); 7412 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 7413 ins_encode %{ 7414 int vector_len = 0; 7415 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7416 %} 7417 ins_pipe( pipe_slow ); 7418 %} 7419 7420 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7421 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7422 match(Set dst (AddVS src1 src2)); 7423 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 7424 ins_encode %{ 7425 int vector_len = 0; 7426 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7427 %} 7428 ins_pipe( pipe_slow ); 7429 %} 7430 7431 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 7432 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7433 match(Set dst (AddVS dst src2)); 7434 effect(TEMP src1); 7435 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 7436 ins_encode %{ 7437 int vector_len = 0; 7438 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7439 %} 7440 ins_pipe( pipe_slow ); 7441 %} 7442 7443 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7444 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7445 match(Set dst (AddVS src (LoadVector mem))); 7446 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 7447 ins_encode %{ 7448 int vector_len = 0; 7449 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7450 %} 7451 ins_pipe( pipe_slow ); 7452 %} 7453 7454 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7455 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7456 match(Set dst (AddVS src (LoadVector mem))); 7457 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 7458 ins_encode %{ 7459 int vector_len = 0; 7460 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7461 %} 7462 ins_pipe( pipe_slow ); 7463 %} 7464 7465 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7466 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7467 match(Set dst (AddVS dst (LoadVector mem))); 7468 effect(TEMP src); 7469 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 7470 ins_encode %{ 7471 int vector_len = 0; 7472 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7473 %} 7474 ins_pipe( pipe_slow ); 7475 %} 7476 7477 instruct vadd4S(vecD dst, vecD src) %{ 7478 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7479 match(Set dst (AddVS dst src)); 7480 format %{ "paddw $dst,$src\t! add packed4S" %} 7481 ins_encode %{ 7482 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 7483 %} 7484 ins_pipe( pipe_slow ); 7485 %} 7486 7487 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7488 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7489 match(Set dst (AddVS src1 src2)); 7490 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 7491 ins_encode %{ 7492 int vector_len = 0; 7493 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7494 %} 7495 ins_pipe( pipe_slow ); 7496 %} 7497 7498 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7499 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7500 match(Set dst (AddVS src1 src2)); 7501 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 7502 ins_encode %{ 7503 int vector_len = 0; 7504 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7505 %} 7506 ins_pipe( pipe_slow ); 7507 %} 7508 7509 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7510 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7511 match(Set dst (AddVS dst src2)); 7512 effect(TEMP src1); 7513 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 7514 ins_encode %{ 7515 int vector_len = 0; 7516 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7517 %} 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7522 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7523 match(Set dst (AddVS src (LoadVector mem))); 7524 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 7525 ins_encode %{ 7526 int vector_len = 0; 7527 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7528 %} 7529 ins_pipe( pipe_slow ); 7530 %} 7531 7532 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7533 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7534 match(Set dst (AddVS src (LoadVector mem))); 7535 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 7536 ins_encode %{ 7537 int vector_len = 0; 7538 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7539 %} 7540 ins_pipe( pipe_slow ); 7541 %} 7542 7543 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7544 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7545 match(Set dst (AddVS dst (LoadVector mem))); 7546 effect(TEMP src); 7547 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 7548 ins_encode %{ 7549 int vector_len = 0; 7550 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7551 %} 7552 ins_pipe( pipe_slow ); 7553 %} 7554 7555 instruct vadd8S(vecX dst, vecX src) %{ 7556 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7557 match(Set dst (AddVS dst src)); 7558 format %{ "paddw $dst,$src\t! add packed8S" %} 7559 ins_encode %{ 7560 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 7561 %} 7562 ins_pipe( pipe_slow ); 7563 %} 7564 7565 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7566 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7567 match(Set dst (AddVS src1 src2)); 7568 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 7569 ins_encode %{ 7570 int vector_len = 0; 7571 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7572 %} 7573 ins_pipe( pipe_slow ); 7574 %} 7575 7576 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7577 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7578 match(Set dst (AddVS src1 src2)); 7579 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 7580 ins_encode %{ 7581 int vector_len = 0; 7582 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7583 %} 7584 ins_pipe( pipe_slow ); 7585 %} 7586 7587 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7588 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7589 match(Set dst (AddVS dst src2)); 7590 effect(TEMP src1); 7591 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 7592 ins_encode %{ 7593 int vector_len = 0; 7594 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7595 %} 7596 ins_pipe( pipe_slow ); 7597 %} 7598 7599 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7600 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7601 match(Set dst (AddVS src (LoadVector mem))); 7602 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 7603 ins_encode %{ 7604 int vector_len = 0; 7605 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7606 %} 7607 ins_pipe( pipe_slow ); 7608 %} 7609 7610 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7611 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7612 match(Set dst (AddVS src (LoadVector mem))); 7613 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 7614 ins_encode %{ 7615 int vector_len = 0; 7616 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7617 %} 7618 ins_pipe( pipe_slow ); 7619 %} 7620 7621 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7622 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7623 match(Set dst (AddVS dst (LoadVector mem))); 7624 effect(TEMP src); 7625 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 7626 ins_encode %{ 7627 int vector_len = 0; 7628 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7629 %} 7630 ins_pipe( pipe_slow ); 7631 %} 7632 7633 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7634 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7635 match(Set dst (AddVS src1 src2)); 7636 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 7637 ins_encode %{ 7638 int vector_len = 1; 7639 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7640 %} 7641 ins_pipe( pipe_slow ); 7642 %} 7643 7644 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7645 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7646 match(Set dst (AddVS src1 src2)); 7647 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 7648 ins_encode %{ 7649 int vector_len = 1; 7650 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7651 %} 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7656 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7657 match(Set dst (AddVS dst src2)); 7658 effect(TEMP src1); 7659 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 7660 ins_encode %{ 7661 int vector_len = 1; 7662 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7663 %} 7664 ins_pipe( pipe_slow ); 7665 %} 7666 7667 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7668 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7669 match(Set dst (AddVS src (LoadVector mem))); 7670 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 7671 ins_encode %{ 7672 int vector_len = 1; 7673 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7679 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7680 match(Set dst (AddVS src (LoadVector mem))); 7681 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 7682 ins_encode %{ 7683 int vector_len = 1; 7684 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7690 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7691 match(Set dst (AddVS dst (LoadVector mem))); 7692 effect(TEMP src); 7693 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 7694 ins_encode %{ 7695 int vector_len = 1; 7696 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7702 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7703 match(Set dst (AddVS src1 src2)); 7704 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 7705 ins_encode %{ 7706 int vector_len = 2; 7707 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7708 %} 7709 ins_pipe( pipe_slow ); 7710 %} 7711 7712 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 7713 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7714 match(Set dst (AddVS src (LoadVector mem))); 7715 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 7716 ins_encode %{ 7717 int vector_len = 2; 7718 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 // Integers vector add 7724 instruct vadd2I(vecD dst, vecD src) %{ 7725 predicate(n->as_Vector()->length() == 2); 7726 match(Set dst (AddVI dst src)); 7727 format %{ "paddd $dst,$src\t! add packed2I" %} 7728 ins_encode %{ 7729 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 7735 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7736 match(Set dst (AddVI src1 src2)); 7737 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 7738 ins_encode %{ 7739 int vector_len = 0; 7740 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7741 %} 7742 ins_pipe( pipe_slow ); 7743 %} 7744 7745 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 7746 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7747 match(Set dst (AddVI src (LoadVector mem))); 7748 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 7749 ins_encode %{ 7750 int vector_len = 0; 7751 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7752 %} 7753 ins_pipe( pipe_slow ); 7754 %} 7755 7756 instruct vadd4I(vecX dst, vecX src) %{ 7757 predicate(n->as_Vector()->length() == 4); 7758 match(Set dst (AddVI dst src)); 7759 format %{ "paddd $dst,$src\t! add packed4I" %} 7760 ins_encode %{ 7761 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 7766 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 7767 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7768 match(Set dst (AddVI src1 src2)); 7769 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 7770 ins_encode %{ 7771 int vector_len = 0; 7772 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 7778 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7779 match(Set dst (AddVI src (LoadVector mem))); 7780 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 7781 ins_encode %{ 7782 int vector_len = 0; 7783 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 7789 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7790 match(Set dst (AddVI src1 src2)); 7791 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 7792 ins_encode %{ 7793 int vector_len = 1; 7794 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 7800 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7801 match(Set dst (AddVI src (LoadVector mem))); 7802 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 7803 ins_encode %{ 7804 int vector_len = 1; 7805 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7806 %} 7807 ins_pipe( pipe_slow ); 7808 %} 7809 7810 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7811 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7812 match(Set dst (AddVI src1 src2)); 7813 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 7814 ins_encode %{ 7815 int vector_len = 2; 7816 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7817 %} 7818 ins_pipe( pipe_slow ); 7819 %} 7820 7821 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 7822 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7823 match(Set dst (AddVI src (LoadVector mem))); 7824 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 7825 ins_encode %{ 7826 int vector_len = 2; 7827 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7828 %} 7829 ins_pipe( pipe_slow ); 7830 %} 7831 7832 // Longs vector add 7833 instruct vadd2L(vecX dst, vecX src) %{ 7834 predicate(n->as_Vector()->length() == 2); 7835 match(Set dst (AddVL dst src)); 7836 format %{ "paddq $dst,$src\t! add packed2L" %} 7837 ins_encode %{ 7838 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 7839 %} 7840 ins_pipe( pipe_slow ); 7841 %} 7842 7843 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 7844 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7845 match(Set dst (AddVL src1 src2)); 7846 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 7847 ins_encode %{ 7848 int vector_len = 0; 7849 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7850 %} 7851 ins_pipe( pipe_slow ); 7852 %} 7853 7854 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 7855 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7856 match(Set dst (AddVL src (LoadVector mem))); 7857 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 7858 ins_encode %{ 7859 int vector_len = 0; 7860 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7861 %} 7862 ins_pipe( pipe_slow ); 7863 %} 7864 7865 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 7866 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7867 match(Set dst (AddVL src1 src2)); 7868 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 7869 ins_encode %{ 7870 int vector_len = 1; 7871 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7872 %} 7873 ins_pipe( pipe_slow ); 7874 %} 7875 7876 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 7877 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7878 match(Set dst (AddVL src (LoadVector mem))); 7879 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 7880 ins_encode %{ 7881 int vector_len = 1; 7882 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7883 %} 7884 ins_pipe( pipe_slow ); 7885 %} 7886 7887 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7888 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7889 match(Set dst (AddVL src1 src2)); 7890 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 7891 ins_encode %{ 7892 int vector_len = 2; 7893 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7894 %} 7895 ins_pipe( pipe_slow ); 7896 %} 7897 7898 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 7899 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7900 match(Set dst (AddVL src (LoadVector mem))); 7901 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 7902 ins_encode %{ 7903 int vector_len = 2; 7904 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7905 %} 7906 ins_pipe( pipe_slow ); 7907 %} 7908 7909 // Floats vector add 7910 instruct vadd2F(vecD dst, vecD src) %{ 7911 predicate(n->as_Vector()->length() == 2); 7912 match(Set dst (AddVF dst src)); 7913 format %{ "addps $dst,$src\t! add packed2F" %} 7914 ins_encode %{ 7915 __ addps($dst$$XMMRegister, $src$$XMMRegister); 7916 %} 7917 ins_pipe( pipe_slow ); 7918 %} 7919 7920 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 7921 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7922 match(Set dst (AddVF src1 src2)); 7923 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 7924 ins_encode %{ 7925 int vector_len = 0; 7926 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7927 %} 7928 ins_pipe( pipe_slow ); 7929 %} 7930 7931 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 7932 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7933 match(Set dst (AddVF src (LoadVector mem))); 7934 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 7935 ins_encode %{ 7936 int vector_len = 0; 7937 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 instruct vadd4F(vecX dst, vecX src) %{ 7943 predicate(n->as_Vector()->length() == 4); 7944 match(Set dst (AddVF dst src)); 7945 format %{ "addps $dst,$src\t! add packed4F" %} 7946 ins_encode %{ 7947 __ addps($dst$$XMMRegister, $src$$XMMRegister); 7948 %} 7949 ins_pipe( pipe_slow ); 7950 %} 7951 7952 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 7953 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7954 match(Set dst (AddVF src1 src2)); 7955 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 7956 ins_encode %{ 7957 int vector_len = 0; 7958 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7959 %} 7960 ins_pipe( pipe_slow ); 7961 %} 7962 7963 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 7964 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7965 match(Set dst (AddVF src (LoadVector mem))); 7966 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 7967 ins_encode %{ 7968 int vector_len = 0; 7969 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7970 %} 7971 ins_pipe( pipe_slow ); 7972 %} 7973 7974 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 7975 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7976 match(Set dst (AddVF src1 src2)); 7977 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 7978 ins_encode %{ 7979 int vector_len = 1; 7980 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 7986 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7987 match(Set dst (AddVF src (LoadVector mem))); 7988 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 7989 ins_encode %{ 7990 int vector_len = 1; 7991 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7997 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7998 match(Set dst (AddVF src1 src2)); 7999 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 8000 ins_encode %{ 8001 int vector_len = 2; 8002 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 8008 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8009 match(Set dst (AddVF src (LoadVector mem))); 8010 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 8011 ins_encode %{ 8012 int vector_len = 2; 8013 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8014 %} 8015 ins_pipe( pipe_slow ); 8016 %} 8017 8018 // Doubles vector add 8019 instruct vadd2D(vecX dst, vecX src) %{ 8020 predicate(n->as_Vector()->length() == 2); 8021 match(Set dst (AddVD dst src)); 8022 format %{ "addpd $dst,$src\t! add packed2D" %} 8023 ins_encode %{ 8024 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 8025 %} 8026 ins_pipe( pipe_slow ); 8027 %} 8028 8029 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 8030 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8031 match(Set dst (AddVD src1 src2)); 8032 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 8033 ins_encode %{ 8034 int vector_len = 0; 8035 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 8041 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8042 match(Set dst (AddVD src (LoadVector mem))); 8043 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 8044 ins_encode %{ 8045 int vector_len = 0; 8046 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8047 %} 8048 ins_pipe( pipe_slow ); 8049 %} 8050 8051 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 8052 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8053 match(Set dst (AddVD src1 src2)); 8054 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 8055 ins_encode %{ 8056 int vector_len = 1; 8057 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8058 %} 8059 ins_pipe( pipe_slow ); 8060 %} 8061 8062 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 8063 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8064 match(Set dst (AddVD src (LoadVector mem))); 8065 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 8066 ins_encode %{ 8067 int vector_len = 1; 8068 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8069 %} 8070 ins_pipe( pipe_slow ); 8071 %} 8072 8073 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8074 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8075 match(Set dst (AddVD src1 src2)); 8076 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 8077 ins_encode %{ 8078 int vector_len = 2; 8079 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8080 %} 8081 ins_pipe( pipe_slow ); 8082 %} 8083 8084 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 8085 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8086 match(Set dst (AddVD src (LoadVector mem))); 8087 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 8088 ins_encode %{ 8089 int vector_len = 2; 8090 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8091 %} 8092 ins_pipe( pipe_slow ); 8093 %} 8094 8095 // --------------------------------- SUB -------------------------------------- 8096 8097 // Bytes vector sub 8098 instruct vsub4B(vecS dst, vecS src) %{ 8099 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8100 match(Set dst (SubVB dst src)); 8101 format %{ "psubb $dst,$src\t! sub packed4B" %} 8102 ins_encode %{ 8103 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 8104 %} 8105 ins_pipe( pipe_slow ); 8106 %} 8107 8108 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 8109 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8110 match(Set dst (SubVB src1 src2)); 8111 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 8112 ins_encode %{ 8113 int vector_len = 0; 8114 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8115 %} 8116 ins_pipe( pipe_slow ); 8117 %} 8118 8119 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 8120 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8121 match(Set dst (SubVB src1 src2)); 8122 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 8123 ins_encode %{ 8124 int vector_len = 0; 8125 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8126 %} 8127 ins_pipe( pipe_slow ); 8128 %} 8129 8130 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 8131 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8132 match(Set dst (SubVB dst src2)); 8133 effect(TEMP src1); 8134 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 8135 ins_encode %{ 8136 int vector_len = 0; 8137 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8138 %} 8139 ins_pipe( pipe_slow ); 8140 %} 8141 8142 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 8143 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8144 match(Set dst (SubVB src (LoadVector mem))); 8145 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 8146 ins_encode %{ 8147 int vector_len = 0; 8148 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8149 %} 8150 ins_pipe( pipe_slow ); 8151 %} 8152 8153 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 8154 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8155 match(Set dst (SubVB src (LoadVector mem))); 8156 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 8157 ins_encode %{ 8158 int vector_len = 0; 8159 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8160 %} 8161 ins_pipe( pipe_slow ); 8162 %} 8163 8164 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 8165 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8166 match(Set dst (SubVB dst (LoadVector mem))); 8167 effect(TEMP src); 8168 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 8169 ins_encode %{ 8170 int vector_len = 0; 8171 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8172 %} 8173 ins_pipe( pipe_slow ); 8174 %} 8175 8176 instruct vsub8B(vecD dst, vecD src) %{ 8177 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8178 match(Set dst (SubVB dst src)); 8179 format %{ "psubb $dst,$src\t! sub packed8B" %} 8180 ins_encode %{ 8181 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 8182 %} 8183 ins_pipe( pipe_slow ); 8184 %} 8185 8186 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8187 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8188 match(Set dst (SubVB src1 src2)); 8189 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 8190 ins_encode %{ 8191 int vector_len = 0; 8192 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8193 %} 8194 ins_pipe( pipe_slow ); 8195 %} 8196 8197 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8198 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8199 match(Set dst (SubVB src1 src2)); 8200 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 8201 ins_encode %{ 8202 int vector_len = 0; 8203 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8204 %} 8205 ins_pipe( pipe_slow ); 8206 %} 8207 8208 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8209 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8210 match(Set dst (SubVB dst src2)); 8211 effect(TEMP src1); 8212 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 8213 ins_encode %{ 8214 int vector_len = 0; 8215 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8216 %} 8217 ins_pipe( pipe_slow ); 8218 %} 8219 8220 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 8221 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8222 match(Set dst (SubVB src (LoadVector mem))); 8223 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 8224 ins_encode %{ 8225 int vector_len = 0; 8226 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8227 %} 8228 ins_pipe( pipe_slow ); 8229 %} 8230 8231 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 8232 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8233 match(Set dst (SubVB src (LoadVector mem))); 8234 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 8235 ins_encode %{ 8236 int vector_len = 0; 8237 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8238 %} 8239 ins_pipe( pipe_slow ); 8240 %} 8241 8242 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8243 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8244 match(Set dst (SubVB dst (LoadVector mem))); 8245 effect(TEMP src); 8246 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 8247 ins_encode %{ 8248 int vector_len = 0; 8249 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8250 %} 8251 ins_pipe( pipe_slow ); 8252 %} 8253 8254 instruct vsub16B(vecX dst, vecX src) %{ 8255 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 8256 match(Set dst (SubVB dst src)); 8257 format %{ "psubb $dst,$src\t! sub packed16B" %} 8258 ins_encode %{ 8259 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 8260 %} 8261 ins_pipe( pipe_slow ); 8262 %} 8263 8264 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8265 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 8266 match(Set dst (SubVB src1 src2)); 8267 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 8268 ins_encode %{ 8269 int vector_len = 0; 8270 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8271 %} 8272 ins_pipe( pipe_slow ); 8273 %} 8274 8275 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8276 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8277 match(Set dst (SubVB src1 src2)); 8278 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 8279 ins_encode %{ 8280 int vector_len = 0; 8281 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8282 %} 8283 ins_pipe( pipe_slow ); 8284 %} 8285 8286 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8287 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8288 match(Set dst (SubVB dst src2)); 8289 effect(TEMP src1); 8290 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 8291 ins_encode %{ 8292 int vector_len = 0; 8293 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8294 %} 8295 ins_pipe( pipe_slow ); 8296 %} 8297 8298 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 8299 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 8300 match(Set dst (SubVB src (LoadVector mem))); 8301 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 8302 ins_encode %{ 8303 int vector_len = 0; 8304 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8305 %} 8306 ins_pipe( pipe_slow ); 8307 %} 8308 8309 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 8310 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8311 match(Set dst (SubVB src (LoadVector mem))); 8312 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 8313 ins_encode %{ 8314 int vector_len = 0; 8315 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8316 %} 8317 ins_pipe( pipe_slow ); 8318 %} 8319 8320 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8321 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8322 match(Set dst (SubVB dst (LoadVector mem))); 8323 effect(TEMP src); 8324 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 8325 ins_encode %{ 8326 int vector_len = 0; 8327 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8328 %} 8329 ins_pipe( pipe_slow ); 8330 %} 8331 8332 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8333 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 8334 match(Set dst (SubVB src1 src2)); 8335 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 8336 ins_encode %{ 8337 int vector_len = 1; 8338 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8339 %} 8340 ins_pipe( pipe_slow ); 8341 %} 8342 8343 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8344 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8345 match(Set dst (SubVB src1 src2)); 8346 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 8347 ins_encode %{ 8348 int vector_len = 1; 8349 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8350 %} 8351 ins_pipe( pipe_slow ); 8352 %} 8353 8354 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8355 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 8356 match(Set dst (SubVB dst src2)); 8357 effect(TEMP src1); 8358 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 8359 ins_encode %{ 8360 int vector_len = 1; 8361 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8362 %} 8363 ins_pipe( pipe_slow ); 8364 %} 8365 8366 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 8367 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 8368 match(Set dst (SubVB src (LoadVector mem))); 8369 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 8370 ins_encode %{ 8371 int vector_len = 1; 8372 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8373 %} 8374 ins_pipe( pipe_slow ); 8375 %} 8376 8377 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 8378 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8379 match(Set dst (SubVB src (LoadVector mem))); 8380 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 8381 ins_encode %{ 8382 int vector_len = 1; 8383 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8389 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 8390 match(Set dst (SubVB dst (LoadVector mem))); 8391 effect(TEMP src); 8392 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 8393 ins_encode %{ 8394 int vector_len = 1; 8395 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8396 %} 8397 ins_pipe( pipe_slow ); 8398 %} 8399 8400 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8401 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 8402 match(Set dst (SubVB src1 src2)); 8403 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 8404 ins_encode %{ 8405 int vector_len = 2; 8406 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8407 %} 8408 ins_pipe( pipe_slow ); 8409 %} 8410 8411 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 8412 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 8413 match(Set dst (SubVB src (LoadVector mem))); 8414 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 8415 ins_encode %{ 8416 int vector_len = 2; 8417 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8418 %} 8419 ins_pipe( pipe_slow ); 8420 %} 8421 8422 // Shorts/Chars vector sub 8423 instruct vsub2S(vecS dst, vecS src) %{ 8424 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8425 match(Set dst (SubVS dst src)); 8426 format %{ "psubw $dst,$src\t! sub packed2S" %} 8427 ins_encode %{ 8428 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 8429 %} 8430 ins_pipe( pipe_slow ); 8431 %} 8432 8433 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 8434 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8435 match(Set dst (SubVS src1 src2)); 8436 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 8437 ins_encode %{ 8438 int vector_len = 0; 8439 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 8445 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8446 match(Set dst (SubVS src1 src2)); 8447 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 8448 ins_encode %{ 8449 int vector_len = 0; 8450 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8451 %} 8452 ins_pipe( pipe_slow ); 8453 %} 8454 8455 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 8456 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8457 match(Set dst (SubVS dst src2)); 8458 effect(TEMP src1); 8459 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 8460 ins_encode %{ 8461 int vector_len = 0; 8462 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8463 %} 8464 ins_pipe( pipe_slow ); 8465 %} 8466 8467 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 8468 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8469 match(Set dst (SubVS src (LoadVector mem))); 8470 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 8471 ins_encode %{ 8472 int vector_len = 0; 8473 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 8479 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8480 match(Set dst (SubVS src (LoadVector mem))); 8481 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 8482 ins_encode %{ 8483 int vector_len = 0; 8484 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 8490 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8491 match(Set dst (SubVS dst (LoadVector mem))); 8492 effect(TEMP src); 8493 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 8494 ins_encode %{ 8495 int vector_len = 0; 8496 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8497 %} 8498 ins_pipe( pipe_slow ); 8499 %} 8500 8501 instruct vsub4S(vecD dst, vecD src) %{ 8502 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8503 match(Set dst (SubVS dst src)); 8504 format %{ "psubw $dst,$src\t! sub packed4S" %} 8505 ins_encode %{ 8506 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 8512 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8513 match(Set dst (SubVS src1 src2)); 8514 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 8515 ins_encode %{ 8516 int vector_len = 0; 8517 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8518 %} 8519 ins_pipe( pipe_slow ); 8520 %} 8521 8522 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 8523 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8524 match(Set dst (SubVS src1 src2)); 8525 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 8526 ins_encode %{ 8527 int vector_len = 0; 8528 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8529 %} 8530 ins_pipe( pipe_slow ); 8531 %} 8532 8533 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 8534 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8535 match(Set dst (SubVS dst src2)); 8536 effect(TEMP src1); 8537 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 8538 ins_encode %{ 8539 int vector_len = 0; 8540 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8541 %} 8542 ins_pipe( pipe_slow ); 8543 %} 8544 8545 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 8546 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8547 match(Set dst (SubVS src (LoadVector mem))); 8548 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 8549 ins_encode %{ 8550 int vector_len = 0; 8551 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8552 %} 8553 ins_pipe( pipe_slow ); 8554 %} 8555 8556 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 8557 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8558 match(Set dst (SubVS src (LoadVector mem))); 8559 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 8560 ins_encode %{ 8561 int vector_len = 0; 8562 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8563 %} 8564 ins_pipe( pipe_slow ); 8565 %} 8566 8567 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 8568 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8569 match(Set dst (SubVS dst (LoadVector mem))); 8570 effect(TEMP src); 8571 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 8572 ins_encode %{ 8573 int vector_len = 0; 8574 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8575 %} 8576 ins_pipe( pipe_slow ); 8577 %} 8578 8579 instruct vsub8S(vecX dst, vecX src) %{ 8580 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8581 match(Set dst (SubVS dst src)); 8582 format %{ "psubw $dst,$src\t! sub packed8S" %} 8583 ins_encode %{ 8584 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 8585 %} 8586 ins_pipe( pipe_slow ); 8587 %} 8588 8589 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 8590 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8591 match(Set dst (SubVS src1 src2)); 8592 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 8593 ins_encode %{ 8594 int vector_len = 0; 8595 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8596 %} 8597 ins_pipe( pipe_slow ); 8598 %} 8599 8600 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 8601 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8602 match(Set dst (SubVS src1 src2)); 8603 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 8604 ins_encode %{ 8605 int vector_len = 0; 8606 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8607 %} 8608 ins_pipe( pipe_slow ); 8609 %} 8610 8611 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 8612 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8613 match(Set dst (SubVS dst src2)); 8614 effect(TEMP src1); 8615 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 8616 ins_encode %{ 8617 int vector_len = 0; 8618 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 8624 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8625 match(Set dst (SubVS src (LoadVector mem))); 8626 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 8627 ins_encode %{ 8628 int vector_len = 0; 8629 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8630 %} 8631 ins_pipe( pipe_slow ); 8632 %} 8633 8634 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 8635 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8636 match(Set dst (SubVS src (LoadVector mem))); 8637 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 8638 ins_encode %{ 8639 int vector_len = 0; 8640 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8641 %} 8642 ins_pipe( pipe_slow ); 8643 %} 8644 8645 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 8646 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8647 match(Set dst (SubVS dst (LoadVector mem))); 8648 effect(TEMP src); 8649 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 8650 ins_encode %{ 8651 int vector_len = 0; 8652 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8653 %} 8654 ins_pipe( pipe_slow ); 8655 %} 8656 8657 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 8658 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8659 match(Set dst (SubVS src1 src2)); 8660 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 8661 ins_encode %{ 8662 int vector_len = 1; 8663 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8664 %} 8665 ins_pipe( pipe_slow ); 8666 %} 8667 8668 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 8669 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8670 match(Set dst (SubVS src1 src2)); 8671 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 8672 ins_encode %{ 8673 int vector_len = 1; 8674 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 8680 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8681 match(Set dst (SubVS dst src2)); 8682 effect(TEMP src1); 8683 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 8684 ins_encode %{ 8685 int vector_len = 1; 8686 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8687 %} 8688 ins_pipe( pipe_slow ); 8689 %} 8690 8691 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 8692 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8693 match(Set dst (SubVS src (LoadVector mem))); 8694 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 8695 ins_encode %{ 8696 int vector_len = 1; 8697 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8698 %} 8699 ins_pipe( pipe_slow ); 8700 %} 8701 8702 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 8703 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8704 match(Set dst (SubVS src (LoadVector mem))); 8705 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 8706 ins_encode %{ 8707 int vector_len = 1; 8708 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8709 %} 8710 ins_pipe( pipe_slow ); 8711 %} 8712 8713 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 8714 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8715 match(Set dst (SubVS dst (LoadVector mem))); 8716 effect(TEMP src); 8717 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 8718 ins_encode %{ 8719 int vector_len = 1; 8720 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8721 %} 8722 ins_pipe( pipe_slow ); 8723 %} 8724 8725 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8726 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8727 match(Set dst (SubVS src1 src2)); 8728 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 8729 ins_encode %{ 8730 int vector_len = 2; 8731 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8732 %} 8733 ins_pipe( pipe_slow ); 8734 %} 8735 8736 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 8737 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 8738 match(Set dst (SubVS src (LoadVector mem))); 8739 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 8740 ins_encode %{ 8741 int vector_len = 2; 8742 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8743 %} 8744 ins_pipe( pipe_slow ); 8745 %} 8746 8747 // Integers vector sub 8748 instruct vsub2I(vecD dst, vecD src) %{ 8749 predicate(n->as_Vector()->length() == 2); 8750 match(Set dst (SubVI dst src)); 8751 format %{ "psubd $dst,$src\t! sub packed2I" %} 8752 ins_encode %{ 8753 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 8754 %} 8755 ins_pipe( pipe_slow ); 8756 %} 8757 8758 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 8759 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8760 match(Set dst (SubVI src1 src2)); 8761 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 8762 ins_encode %{ 8763 int vector_len = 0; 8764 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8765 %} 8766 ins_pipe( pipe_slow ); 8767 %} 8768 8769 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 8770 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8771 match(Set dst (SubVI src (LoadVector mem))); 8772 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 8773 ins_encode %{ 8774 int vector_len = 0; 8775 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 instruct vsub4I(vecX dst, vecX src) %{ 8781 predicate(n->as_Vector()->length() == 4); 8782 match(Set dst (SubVI dst src)); 8783 format %{ "psubd $dst,$src\t! sub packed4I" %} 8784 ins_encode %{ 8785 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 8786 %} 8787 ins_pipe( pipe_slow ); 8788 %} 8789 8790 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 8791 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8792 match(Set dst (SubVI src1 src2)); 8793 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 8794 ins_encode %{ 8795 int vector_len = 0; 8796 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8797 %} 8798 ins_pipe( pipe_slow ); 8799 %} 8800 8801 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 8802 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8803 match(Set dst (SubVI src (LoadVector mem))); 8804 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 8805 ins_encode %{ 8806 int vector_len = 0; 8807 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 8813 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8814 match(Set dst (SubVI src1 src2)); 8815 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 8816 ins_encode %{ 8817 int vector_len = 1; 8818 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8819 %} 8820 ins_pipe( pipe_slow ); 8821 %} 8822 8823 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 8824 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8825 match(Set dst (SubVI src (LoadVector mem))); 8826 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 8827 ins_encode %{ 8828 int vector_len = 1; 8829 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8830 %} 8831 ins_pipe( pipe_slow ); 8832 %} 8833 8834 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8835 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8836 match(Set dst (SubVI src1 src2)); 8837 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 8838 ins_encode %{ 8839 int vector_len = 2; 8840 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8841 %} 8842 ins_pipe( pipe_slow ); 8843 %} 8844 8845 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 8846 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8847 match(Set dst (SubVI src (LoadVector mem))); 8848 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 8849 ins_encode %{ 8850 int vector_len = 2; 8851 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8852 %} 8853 ins_pipe( pipe_slow ); 8854 %} 8855 8856 // Longs vector sub 8857 instruct vsub2L(vecX dst, vecX src) %{ 8858 predicate(n->as_Vector()->length() == 2); 8859 match(Set dst (SubVL dst src)); 8860 format %{ "psubq $dst,$src\t! sub packed2L" %} 8861 ins_encode %{ 8862 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 8863 %} 8864 ins_pipe( pipe_slow ); 8865 %} 8866 8867 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 8868 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8869 match(Set dst (SubVL src1 src2)); 8870 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 8871 ins_encode %{ 8872 int vector_len = 0; 8873 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8874 %} 8875 ins_pipe( pipe_slow ); 8876 %} 8877 8878 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 8879 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8880 match(Set dst (SubVL src (LoadVector mem))); 8881 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 8882 ins_encode %{ 8883 int vector_len = 0; 8884 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8885 %} 8886 ins_pipe( pipe_slow ); 8887 %} 8888 8889 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 8890 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8891 match(Set dst (SubVL src1 src2)); 8892 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 8893 ins_encode %{ 8894 int vector_len = 1; 8895 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8896 %} 8897 ins_pipe( pipe_slow ); 8898 %} 8899 8900 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 8901 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 8902 match(Set dst (SubVL src (LoadVector mem))); 8903 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 8904 ins_encode %{ 8905 int vector_len = 1; 8906 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8907 %} 8908 ins_pipe( pipe_slow ); 8909 %} 8910 8911 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8912 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8913 match(Set dst (SubVL src1 src2)); 8914 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 8915 ins_encode %{ 8916 int vector_len = 2; 8917 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8918 %} 8919 ins_pipe( pipe_slow ); 8920 %} 8921 8922 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 8923 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8924 match(Set dst (SubVL src (LoadVector mem))); 8925 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 8926 ins_encode %{ 8927 int vector_len = 2; 8928 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 // Floats vector sub 8934 instruct vsub2F(vecD dst, vecD src) %{ 8935 predicate(n->as_Vector()->length() == 2); 8936 match(Set dst (SubVF dst src)); 8937 format %{ "subps $dst,$src\t! sub packed2F" %} 8938 ins_encode %{ 8939 __ subps($dst$$XMMRegister, $src$$XMMRegister); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 8945 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8946 match(Set dst (SubVF src1 src2)); 8947 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 8948 ins_encode %{ 8949 int vector_len = 0; 8950 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8951 %} 8952 ins_pipe( pipe_slow ); 8953 %} 8954 8955 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 8956 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8957 match(Set dst (SubVF src (LoadVector mem))); 8958 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 8959 ins_encode %{ 8960 int vector_len = 0; 8961 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8962 %} 8963 ins_pipe( pipe_slow ); 8964 %} 8965 8966 instruct vsub4F(vecX dst, vecX src) %{ 8967 predicate(n->as_Vector()->length() == 4); 8968 match(Set dst (SubVF dst src)); 8969 format %{ "subps $dst,$src\t! sub packed4F" %} 8970 ins_encode %{ 8971 __ subps($dst$$XMMRegister, $src$$XMMRegister); 8972 %} 8973 ins_pipe( pipe_slow ); 8974 %} 8975 8976 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 8977 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8978 match(Set dst (SubVF src1 src2)); 8979 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 8980 ins_encode %{ 8981 int vector_len = 0; 8982 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 8988 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8989 match(Set dst (SubVF src (LoadVector mem))); 8990 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 8991 ins_encode %{ 8992 int vector_len = 0; 8993 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8994 %} 8995 ins_pipe( pipe_slow ); 8996 %} 8997 8998 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 8999 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9000 match(Set dst (SubVF src1 src2)); 9001 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 9002 ins_encode %{ 9003 int vector_len = 1; 9004 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9005 %} 9006 ins_pipe( pipe_slow ); 9007 %} 9008 9009 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 9010 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9011 match(Set dst (SubVF src (LoadVector mem))); 9012 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 9013 ins_encode %{ 9014 int vector_len = 1; 9015 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9016 %} 9017 ins_pipe( pipe_slow ); 9018 %} 9019 9020 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9021 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9022 match(Set dst (SubVF src1 src2)); 9023 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 9024 ins_encode %{ 9025 int vector_len = 2; 9026 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9027 %} 9028 ins_pipe( pipe_slow ); 9029 %} 9030 9031 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 9032 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9033 match(Set dst (SubVF src (LoadVector mem))); 9034 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 9035 ins_encode %{ 9036 int vector_len = 2; 9037 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9038 %} 9039 ins_pipe( pipe_slow ); 9040 %} 9041 9042 // Doubles vector sub 9043 instruct vsub2D(vecX dst, vecX src) %{ 9044 predicate(n->as_Vector()->length() == 2); 9045 match(Set dst (SubVD dst src)); 9046 format %{ "subpd $dst,$src\t! sub packed2D" %} 9047 ins_encode %{ 9048 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 9049 %} 9050 ins_pipe( pipe_slow ); 9051 %} 9052 9053 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 9054 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9055 match(Set dst (SubVD src1 src2)); 9056 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 9057 ins_encode %{ 9058 int vector_len = 0; 9059 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9060 %} 9061 ins_pipe( pipe_slow ); 9062 %} 9063 9064 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 9065 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9066 match(Set dst (SubVD src (LoadVector mem))); 9067 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 9068 ins_encode %{ 9069 int vector_len = 0; 9070 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9071 %} 9072 ins_pipe( pipe_slow ); 9073 %} 9074 9075 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 9076 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9077 match(Set dst (SubVD src1 src2)); 9078 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 9079 ins_encode %{ 9080 int vector_len = 1; 9081 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9082 %} 9083 ins_pipe( pipe_slow ); 9084 %} 9085 9086 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 9087 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9088 match(Set dst (SubVD src (LoadVector mem))); 9089 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 9090 ins_encode %{ 9091 int vector_len = 1; 9092 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9098 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9099 match(Set dst (SubVD src1 src2)); 9100 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 9101 ins_encode %{ 9102 int vector_len = 2; 9103 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9104 %} 9105 ins_pipe( pipe_slow ); 9106 %} 9107 9108 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 9109 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9110 match(Set dst (SubVD src (LoadVector mem))); 9111 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 9112 ins_encode %{ 9113 int vector_len = 2; 9114 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9115 %} 9116 ins_pipe( pipe_slow ); 9117 %} 9118 9119 // --------------------------------- MUL -------------------------------------- 9120 9121 // Byte vector mul 9122 9123 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ 9124 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 9125 match(Set dst (MulVB src1 src2)); 9126 effect(TEMP dst, TEMP tmp2, TEMP tmp); 9127 format %{"pmovsxbw $tmp,$src1\n\t" 9128 "pmovsxbw $tmp2,$src2\n\t" 9129 "pmullw $tmp,$tmp2\n\t" 9130 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 9131 "pand $tmp,$tmp2\n\t" 9132 "packuswb $tmp,$tmp\n\t" 9133 "movss $dst,$tmp\t! mul packed4B" %} 9134 ins_encode %{ 9135 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 9136 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 9137 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9138 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 9139 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 9140 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 9141 __ movss($dst$$XMMRegister, $tmp$$XMMRegister); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ 9147 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 9148 match(Set dst (MulVB src1 src2)); 9149 effect(TEMP dst, TEMP tmp2, TEMP tmp); 9150 format %{"pmovsxbw $tmp,$src1\n\t" 9151 "pmovsxbw $tmp2,$src2\n\t" 9152 "pmullw $tmp,$tmp2\n\t" 9153 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 9154 "pand $tmp,$tmp2\n\t" 9155 "packuswb $tmp,$tmp\n\t" 9156 "movsd $dst,$tmp\t! mul packed8B" %} 9157 ins_encode %{ 9158 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 9159 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 9160 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9161 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 9162 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 9163 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 9164 __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); 9165 %} 9166 ins_pipe( pipe_slow ); 9167 %} 9168 9169 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ 9170 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 9171 match(Set dst (MulVB src1 src2)); 9172 effect(TEMP tmp3, TEMP tmp2, TEMP tmp); 9173 format %{"pmovsxbw $tmp,$src1\n\t" 9174 "pmovsxbw $tmp2,$src2\n\t" 9175 "pmullw $tmp,$tmp2\n\t" 9176 "pshufd $tmp2,$src1\n\t" 9177 "pshufd $tmp3,$src2\n\t" 9178 "pmovsxbw $tmp2,$tmp2\n\t" 9179 "pmovsxbw $tmp3,$tmp3\n\t" 9180 "pmullw $tmp2,$tmp3\n\t" 9181 "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" 9182 "pand $tmp,$tmp3\n\t" 9183 "pand $tmp2,$tmp3\n\t" 9184 "packuswb $tmp,$tmp2\n\t" 9185 "movdqu $dst,$tmp \n\t! mul packed16B" %} 9186 ins_encode %{ 9187 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 9188 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 9189 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9190 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); 9191 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); 9192 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 9193 __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); 9194 __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); 9195 __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 9196 __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); 9197 __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); 9198 __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); 9199 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ 9205 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 9206 match(Set dst (MulVB src1 src2)); 9207 effect(TEMP dst, TEMP tmp2, TEMP tmp); 9208 format %{"vpmovsxbw $tmp,$src1\n\t" 9209 "vpmovsxbw $tmp2,$src2\n\t" 9210 "vpmullw $tmp,$tmp2\n\t" 9211 "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 9212 "vpand $tmp,$tmp2\n\t" 9213 "vextracti128_high $tmp2,$tmp\n\t" 9214 "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} 9215 ins_encode %{ 9216 int vector_len = 1; 9217 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 9218 __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9219 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9220 __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 9221 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9222 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 9223 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 9224 %} 9225 ins_pipe( pipe_slow ); 9226 %} 9227 9228 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ 9229 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 9230 match(Set dst (MulVB src1 src2)); 9231 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); 9232 format %{"vextracti128_high $tmp1,$src1\n\t" 9233 "vextracti128_high $tmp3,$src2\n\t" 9234 "vpmovsxbw $tmp1,$tmp1\n\t" 9235 "vpmovsxbw $tmp3,$tmp3\n\t" 9236 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 9237 "vpmovsxbw $tmp2,$src1\n\t" 9238 "vpmovsxbw $tmp3,$src2\n\t" 9239 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 9240 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 9241 "vpbroadcastd $tmp3, $tmp3\n\t" 9242 "vpand $tmp2,$tmp2,$tmp3\n\t" 9243 "vpand $tmp1,$tmp1,$tmp3\n\t" 9244 "vpackuswb $dst,$tmp2,$tmp1\n\t" 9245 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 9246 ins_encode %{ 9247 int vector_len = 1; 9248 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 9249 __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); 9250 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9251 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9252 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9253 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 9254 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 9255 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9256 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_byte_saturationmask())); 9257 __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister); 9258 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9259 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9260 __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9261 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 9262 %} 9263 ins_pipe( pipe_slow ); 9264 %} 9265 9266 // Shorts/Chars vector mul 9267 instruct vmul2S(vecS dst, vecS src) %{ 9268 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9269 match(Set dst (MulVS dst src)); 9270 format %{ "pmullw $dst,$src\t! mul packed2S" %} 9271 ins_encode %{ 9272 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 9273 %} 9274 ins_pipe( pipe_slow ); 9275 %} 9276 9277 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 9278 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9279 match(Set dst (MulVS src1 src2)); 9280 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 9281 ins_encode %{ 9282 int vector_len = 0; 9283 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9284 %} 9285 ins_pipe( pipe_slow ); 9286 %} 9287 9288 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 9289 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9290 match(Set dst (MulVS src1 src2)); 9291 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 9292 ins_encode %{ 9293 int vector_len = 0; 9294 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9295 %} 9296 ins_pipe( pipe_slow ); 9297 %} 9298 9299 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 9300 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9301 match(Set dst (MulVS dst src2)); 9302 effect(TEMP src1); 9303 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 9304 ins_encode %{ 9305 int vector_len = 0; 9306 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 9312 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9313 match(Set dst (MulVS src (LoadVector mem))); 9314 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 9315 ins_encode %{ 9316 int vector_len = 0; 9317 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9318 %} 9319 ins_pipe( pipe_slow ); 9320 %} 9321 9322 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 9323 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9324 match(Set dst (MulVS src (LoadVector mem))); 9325 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 9326 ins_encode %{ 9327 int vector_len = 0; 9328 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9329 %} 9330 ins_pipe( pipe_slow ); 9331 %} 9332 9333 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 9334 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9335 match(Set dst (MulVS dst (LoadVector mem))); 9336 effect(TEMP src); 9337 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 9338 ins_encode %{ 9339 int vector_len = 0; 9340 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9341 %} 9342 ins_pipe( pipe_slow ); 9343 %} 9344 9345 instruct vmul4S(vecD dst, vecD src) %{ 9346 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9347 match(Set dst (MulVS dst src)); 9348 format %{ "pmullw $dst,$src\t! mul packed4S" %} 9349 ins_encode %{ 9350 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 9351 %} 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 9356 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9357 match(Set dst (MulVS src1 src2)); 9358 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 9359 ins_encode %{ 9360 int vector_len = 0; 9361 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9362 %} 9363 ins_pipe( pipe_slow ); 9364 %} 9365 9366 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 9367 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9368 match(Set dst (MulVS src1 src2)); 9369 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 9370 ins_encode %{ 9371 int vector_len = 0; 9372 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9373 %} 9374 ins_pipe( pipe_slow ); 9375 %} 9376 9377 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 9378 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9379 match(Set dst (MulVS dst src2)); 9380 effect(TEMP src1); 9381 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 9382 ins_encode %{ 9383 int vector_len = 0; 9384 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9385 %} 9386 ins_pipe( pipe_slow ); 9387 %} 9388 9389 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 9390 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9391 match(Set dst (MulVS src (LoadVector mem))); 9392 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 9393 ins_encode %{ 9394 int vector_len = 0; 9395 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9396 %} 9397 ins_pipe( pipe_slow ); 9398 %} 9399 9400 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 9401 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9402 match(Set dst (MulVS src (LoadVector mem))); 9403 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 9404 ins_encode %{ 9405 int vector_len = 0; 9406 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9407 %} 9408 ins_pipe( pipe_slow ); 9409 %} 9410 9411 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 9412 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9413 match(Set dst (MulVS dst (LoadVector mem))); 9414 effect(TEMP src); 9415 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 9416 ins_encode %{ 9417 int vector_len = 0; 9418 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9419 %} 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 instruct vmul8S(vecX dst, vecX src) %{ 9424 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9425 match(Set dst (MulVS dst src)); 9426 format %{ "pmullw $dst,$src\t! mul packed8S" %} 9427 ins_encode %{ 9428 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 9434 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9435 match(Set dst (MulVS src1 src2)); 9436 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 9437 ins_encode %{ 9438 int vector_len = 0; 9439 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 9445 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9446 match(Set dst (MulVS src1 src2)); 9447 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 9448 ins_encode %{ 9449 int vector_len = 0; 9450 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9451 %} 9452 ins_pipe( pipe_slow ); 9453 %} 9454 9455 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 9456 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9457 match(Set dst (MulVS dst src2)); 9458 effect(TEMP src1); 9459 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 9460 ins_encode %{ 9461 int vector_len = 0; 9462 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9463 %} 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 9468 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9469 match(Set dst (MulVS src (LoadVector mem))); 9470 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 9471 ins_encode %{ 9472 int vector_len = 0; 9473 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9474 %} 9475 ins_pipe( pipe_slow ); 9476 %} 9477 9478 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 9479 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9480 match(Set dst (MulVS src (LoadVector mem))); 9481 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 9482 ins_encode %{ 9483 int vector_len = 0; 9484 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 9490 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9491 match(Set dst (MulVS dst (LoadVector mem))); 9492 effect(TEMP src); 9493 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 9494 ins_encode %{ 9495 int vector_len = 0; 9496 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9497 %} 9498 ins_pipe( pipe_slow ); 9499 %} 9500 9501 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 9502 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9503 match(Set dst (MulVS src1 src2)); 9504 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 9505 ins_encode %{ 9506 int vector_len = 1; 9507 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9508 %} 9509 ins_pipe( pipe_slow ); 9510 %} 9511 9512 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 9513 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9514 match(Set dst (MulVS src1 src2)); 9515 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 9516 ins_encode %{ 9517 int vector_len = 1; 9518 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9519 %} 9520 ins_pipe( pipe_slow ); 9521 %} 9522 9523 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 9524 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9525 match(Set dst (MulVS dst src2)); 9526 effect(TEMP src1); 9527 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 9528 ins_encode %{ 9529 int vector_len = 1; 9530 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9531 %} 9532 ins_pipe( pipe_slow ); 9533 %} 9534 9535 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 9536 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9537 match(Set dst (MulVS src (LoadVector mem))); 9538 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 9539 ins_encode %{ 9540 int vector_len = 1; 9541 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9542 %} 9543 ins_pipe( pipe_slow ); 9544 %} 9545 9546 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 9547 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9548 match(Set dst (MulVS src (LoadVector mem))); 9549 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 9550 ins_encode %{ 9551 int vector_len = 1; 9552 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9553 %} 9554 ins_pipe( pipe_slow ); 9555 %} 9556 9557 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 9558 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9559 match(Set dst (MulVS dst (LoadVector mem))); 9560 effect(TEMP src); 9561 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 9562 ins_encode %{ 9563 int vector_len = 1; 9564 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9570 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9571 match(Set dst (MulVS src1 src2)); 9572 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 9573 ins_encode %{ 9574 int vector_len = 2; 9575 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9576 %} 9577 ins_pipe( pipe_slow ); 9578 %} 9579 9580 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 9581 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9582 match(Set dst (MulVS src (LoadVector mem))); 9583 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 9584 ins_encode %{ 9585 int vector_len = 2; 9586 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 // Integers vector mul (sse4_1) 9592 instruct vmul2I(vecD dst, vecD src) %{ 9593 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 9594 match(Set dst (MulVI dst src)); 9595 format %{ "pmulld $dst,$src\t! mul packed2I" %} 9596 ins_encode %{ 9597 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 9598 %} 9599 ins_pipe( pipe_slow ); 9600 %} 9601 9602 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 9603 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9604 match(Set dst (MulVI src1 src2)); 9605 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 9606 ins_encode %{ 9607 int vector_len = 0; 9608 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9609 %} 9610 ins_pipe( pipe_slow ); 9611 %} 9612 9613 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 9614 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9615 match(Set dst (MulVI src (LoadVector mem))); 9616 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 9617 ins_encode %{ 9618 int vector_len = 0; 9619 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 instruct vmul4I(vecX dst, vecX src) %{ 9625 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 9626 match(Set dst (MulVI dst src)); 9627 format %{ "pmulld $dst,$src\t! mul packed4I" %} 9628 ins_encode %{ 9629 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 9630 %} 9631 ins_pipe( pipe_slow ); 9632 %} 9633 9634 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 9635 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9636 match(Set dst (MulVI src1 src2)); 9637 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 9638 ins_encode %{ 9639 int vector_len = 0; 9640 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9641 %} 9642 ins_pipe( pipe_slow ); 9643 %} 9644 9645 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 9646 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9647 match(Set dst (MulVI src (LoadVector mem))); 9648 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 9649 ins_encode %{ 9650 int vector_len = 0; 9651 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9652 %} 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 // Long vector mul 9657 9658 instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ 9659 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); 9660 match(Set dst (MulVL dst src2)); 9661 effect(TEMP dst, TEMP tmp); 9662 format %{ "pshufd $tmp,$src2, 177\n\t" 9663 "pmulld $tmp,$dst\n\t" 9664 "phaddd $tmp,$tmp\n\t" 9665 "pmovzxdq $tmp,$tmp\n\t" 9666 "psllq $tmp, 32\n\t" 9667 "pmuludq $dst,$src2\n\t" 9668 "paddq $dst,$tmp\n\t! mul packed2L" %} 9669 9670 ins_encode %{ 9671 int vector_len = 0; 9672 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 9673 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 9674 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 9675 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 9676 __ psllq($tmp$$XMMRegister, 32); 9677 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 9678 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 9679 %} 9680 ins_pipe( pipe_slow ); 9681 %} 9682 9683 instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ 9684 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); 9685 match(Set dst (MulVL src1 src2)); 9686 effect(TEMP tmp1, TEMP tmp); 9687 format %{ "vpshufd $tmp,$src2\n\t" 9688 "vpmulld $tmp,$src1,$tmp\n\t" 9689 "vphaddd $tmp,$tmp,$tmp\n\t" 9690 "vpmovzxdq $tmp,$tmp\n\t" 9691 "vpsllq $tmp,$tmp\n\t" 9692 "vpmuludq $tmp1,$src1,$src2\n\t" 9693 "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} 9694 ins_encode %{ 9695 int vector_len = 0; 9696 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 9697 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 9698 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9699 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9700 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 9701 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9702 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9703 %} 9704 ins_pipe( pipe_slow ); 9705 %} 9706 9707 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 9708 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 9709 match(Set dst (MulVL src1 src2)); 9710 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 9711 ins_encode %{ 9712 int vector_len = 0; 9713 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9714 %} 9715 ins_pipe( pipe_slow ); 9716 %} 9717 9718 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 9719 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 9720 match(Set dst (MulVL src (LoadVector mem))); 9721 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 9722 ins_encode %{ 9723 int vector_len = 0; 9724 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9725 %} 9726 ins_pipe( pipe_slow ); 9727 %} 9728 9729 instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ 9730 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); 9731 match(Set dst (MulVL src1 src2)); 9732 effect(TEMP tmp1, TEMP tmp); 9733 format %{ "vpshufd $tmp,$src2\n\t" 9734 "vpmulld $tmp,$src1,$tmp\n\t" 9735 "vphaddd $tmp,$tmp,$tmp\n\t" 9736 "vpmovzxdq $tmp,$tmp\n\t" 9737 "vpsllq $tmp,$tmp\n\t" 9738 "vpmuludq $tmp1,$src1,$src2\n\t" 9739 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 9740 ins_encode %{ 9741 int vector_len = 1; 9742 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 9743 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 9744 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9745 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 9746 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 9747 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9748 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 9754 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 9755 match(Set dst (MulVL src1 src2)); 9756 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 9757 ins_encode %{ 9758 int vector_len = 1; 9759 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9760 %} 9761 ins_pipe( pipe_slow ); 9762 %} 9763 9764 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 9765 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 9766 match(Set dst (MulVL src (LoadVector mem))); 9767 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 9768 ins_encode %{ 9769 int vector_len = 1; 9770 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 %} 9774 9775 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9776 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 9777 match(Set dst (MulVL src1 src2)); 9778 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 9779 ins_encode %{ 9780 int vector_len = 2; 9781 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9782 %} 9783 ins_pipe( pipe_slow ); 9784 %} 9785 9786 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 9787 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 9788 match(Set dst (MulVL src (LoadVector mem))); 9789 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 9790 ins_encode %{ 9791 int vector_len = 2; 9792 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 9798 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9799 match(Set dst (MulVI src1 src2)); 9800 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 9801 ins_encode %{ 9802 int vector_len = 1; 9803 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9804 %} 9805 ins_pipe( pipe_slow ); 9806 %} 9807 9808 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 9809 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9810 match(Set dst (MulVI src (LoadVector mem))); 9811 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 9812 ins_encode %{ 9813 int vector_len = 1; 9814 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9815 %} 9816 ins_pipe( pipe_slow ); 9817 %} 9818 9819 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9820 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9821 match(Set dst (MulVI src1 src2)); 9822 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 9823 ins_encode %{ 9824 int vector_len = 2; 9825 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9826 %} 9827 ins_pipe( pipe_slow ); 9828 %} 9829 9830 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 9831 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9832 match(Set dst (MulVI src (LoadVector mem))); 9833 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 9834 ins_encode %{ 9835 int vector_len = 2; 9836 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9837 %} 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 // Floats vector mul 9842 instruct vmul2F(vecD dst, vecD src) %{ 9843 predicate(n->as_Vector()->length() == 2); 9844 match(Set dst (MulVF dst src)); 9845 format %{ "mulps $dst,$src\t! mul packed2F" %} 9846 ins_encode %{ 9847 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 9848 %} 9849 ins_pipe( pipe_slow ); 9850 %} 9851 9852 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 9853 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9854 match(Set dst (MulVF src1 src2)); 9855 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 9856 ins_encode %{ 9857 int vector_len = 0; 9858 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 9864 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9865 match(Set dst (MulVF src (LoadVector mem))); 9866 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 9867 ins_encode %{ 9868 int vector_len = 0; 9869 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vmul4F(vecX dst, vecX src) %{ 9875 predicate(n->as_Vector()->length() == 4); 9876 match(Set dst (MulVF dst src)); 9877 format %{ "mulps $dst,$src\t! mul packed4F" %} 9878 ins_encode %{ 9879 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 9880 %} 9881 ins_pipe( pipe_slow ); 9882 %} 9883 9884 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 9885 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9886 match(Set dst (MulVF src1 src2)); 9887 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 9888 ins_encode %{ 9889 int vector_len = 0; 9890 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9891 %} 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 9896 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9897 match(Set dst (MulVF src (LoadVector mem))); 9898 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 9899 ins_encode %{ 9900 int vector_len = 0; 9901 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9902 %} 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 9907 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9908 match(Set dst (MulVF src1 src2)); 9909 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 9910 ins_encode %{ 9911 int vector_len = 1; 9912 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9913 %} 9914 ins_pipe( pipe_slow ); 9915 %} 9916 9917 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 9918 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 9919 match(Set dst (MulVF src (LoadVector mem))); 9920 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 9921 ins_encode %{ 9922 int vector_len = 1; 9923 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9924 %} 9925 ins_pipe( pipe_slow ); 9926 %} 9927 9928 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 9929 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9930 match(Set dst (MulVF src1 src2)); 9931 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 9932 ins_encode %{ 9933 int vector_len = 2; 9934 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9935 %} 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 9940 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9941 match(Set dst (MulVF src (LoadVector mem))); 9942 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 9943 ins_encode %{ 9944 int vector_len = 2; 9945 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9946 %} 9947 ins_pipe( pipe_slow ); 9948 %} 9949 9950 // Doubles vector mul 9951 instruct vmul2D(vecX dst, vecX src) %{ 9952 predicate(n->as_Vector()->length() == 2); 9953 match(Set dst (MulVD dst src)); 9954 format %{ "mulpd $dst,$src\t! mul packed2D" %} 9955 ins_encode %{ 9956 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 9957 %} 9958 ins_pipe( pipe_slow ); 9959 %} 9960 9961 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 9962 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9963 match(Set dst (MulVD src1 src2)); 9964 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 9965 ins_encode %{ 9966 int vector_len = 0; 9967 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9968 %} 9969 ins_pipe( pipe_slow ); 9970 %} 9971 9972 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 9973 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9974 match(Set dst (MulVD src (LoadVector mem))); 9975 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 9976 ins_encode %{ 9977 int vector_len = 0; 9978 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 9979 %} 9980 ins_pipe( pipe_slow ); 9981 %} 9982 9983 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 9984 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9985 match(Set dst (MulVD src1 src2)); 9986 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 9987 ins_encode %{ 9988 int vector_len = 1; 9989 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 9990 %} 9991 ins_pipe( pipe_slow ); 9992 %} 9993 9994 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 9995 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9996 match(Set dst (MulVD src (LoadVector mem))); 9997 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 9998 ins_encode %{ 9999 int vector_len = 1; 10000 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10001 %} 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10006 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10007 match(Set dst (MulVD src1 src2)); 10008 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 10009 ins_encode %{ 10010 int vector_len = 2; 10011 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10012 %} 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 10017 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10018 match(Set dst (MulVD src (LoadVector mem))); 10019 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 10020 ins_encode %{ 10021 int vector_len = 2; 10022 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 10028 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 10029 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 10030 effect(TEMP dst, USE src1, USE src2); 10031 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 10032 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 10033 %} 10034 ins_encode %{ 10035 int vector_len = 1; 10036 int cond = (Assembler::Condition)($copnd$$cmpcode); 10037 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 10038 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10039 %} 10040 ins_pipe( pipe_slow ); 10041 %} 10042 10043 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 10044 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 10045 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 10046 effect(TEMP dst, USE src1, USE src2); 10047 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 10048 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 10049 %} 10050 ins_encode %{ 10051 int vector_len = 1; 10052 int cond = (Assembler::Condition)($copnd$$cmpcode); 10053 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 10054 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10055 %} 10056 ins_pipe( pipe_slow ); 10057 %} 10058 10059 // --------------------------------- DIV -------------------------------------- 10060 10061 // Floats vector div 10062 instruct vdiv2F(vecD dst, vecD src) %{ 10063 predicate(n->as_Vector()->length() == 2); 10064 match(Set dst (DivVF dst src)); 10065 format %{ "divps $dst,$src\t! div packed2F" %} 10066 ins_encode %{ 10067 __ divps($dst$$XMMRegister, $src$$XMMRegister); 10068 %} 10069 ins_pipe( pipe_slow ); 10070 %} 10071 10072 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 10073 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10074 match(Set dst (DivVF src1 src2)); 10075 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 10076 ins_encode %{ 10077 int vector_len = 0; 10078 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10079 %} 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 10084 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10085 match(Set dst (DivVF src (LoadVector mem))); 10086 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 10087 ins_encode %{ 10088 int vector_len = 0; 10089 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10090 %} 10091 ins_pipe( pipe_slow ); 10092 %} 10093 10094 instruct vdiv4F(vecX dst, vecX src) %{ 10095 predicate(n->as_Vector()->length() == 4); 10096 match(Set dst (DivVF dst src)); 10097 format %{ "divps $dst,$src\t! div packed4F" %} 10098 ins_encode %{ 10099 __ divps($dst$$XMMRegister, $src$$XMMRegister); 10100 %} 10101 ins_pipe( pipe_slow ); 10102 %} 10103 10104 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 10105 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10106 match(Set dst (DivVF src1 src2)); 10107 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 10108 ins_encode %{ 10109 int vector_len = 0; 10110 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10111 %} 10112 ins_pipe( pipe_slow ); 10113 %} 10114 10115 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 10116 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10117 match(Set dst (DivVF src (LoadVector mem))); 10118 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 10119 ins_encode %{ 10120 int vector_len = 0; 10121 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10122 %} 10123 ins_pipe( pipe_slow ); 10124 %} 10125 10126 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 10127 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 10128 match(Set dst (DivVF src1 src2)); 10129 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 10130 ins_encode %{ 10131 int vector_len = 1; 10132 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10133 %} 10134 ins_pipe( pipe_slow ); 10135 %} 10136 10137 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 10138 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 10139 match(Set dst (DivVF src (LoadVector mem))); 10140 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 10141 ins_encode %{ 10142 int vector_len = 1; 10143 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10144 %} 10145 ins_pipe( pipe_slow ); 10146 %} 10147 10148 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10149 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 10150 match(Set dst (DivVF src1 src2)); 10151 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 10152 ins_encode %{ 10153 int vector_len = 2; 10154 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10155 %} 10156 ins_pipe( pipe_slow ); 10157 %} 10158 10159 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 10160 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 10161 match(Set dst (DivVF src (LoadVector mem))); 10162 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 10163 ins_encode %{ 10164 int vector_len = 2; 10165 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10166 %} 10167 ins_pipe( pipe_slow ); 10168 %} 10169 10170 // Doubles vector div 10171 instruct vdiv2D(vecX dst, vecX src) %{ 10172 predicate(n->as_Vector()->length() == 2); 10173 match(Set dst (DivVD dst src)); 10174 format %{ "divpd $dst,$src\t! div packed2D" %} 10175 ins_encode %{ 10176 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 10177 %} 10178 ins_pipe( pipe_slow ); 10179 %} 10180 10181 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 10182 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10183 match(Set dst (DivVD src1 src2)); 10184 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 10185 ins_encode %{ 10186 int vector_len = 0; 10187 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10188 %} 10189 ins_pipe( pipe_slow ); 10190 %} 10191 10192 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 10193 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10194 match(Set dst (DivVD src (LoadVector mem))); 10195 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 10196 ins_encode %{ 10197 int vector_len = 0; 10198 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10199 %} 10200 ins_pipe( pipe_slow ); 10201 %} 10202 10203 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 10204 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10205 match(Set dst (DivVD src1 src2)); 10206 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 10207 ins_encode %{ 10208 int vector_len = 1; 10209 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10210 %} 10211 ins_pipe( pipe_slow ); 10212 %} 10213 10214 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 10215 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10216 match(Set dst (DivVD src (LoadVector mem))); 10217 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 10218 ins_encode %{ 10219 int vector_len = 1; 10220 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10221 %} 10222 ins_pipe( pipe_slow ); 10223 %} 10224 10225 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10226 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10227 match(Set dst (DivVD src1 src2)); 10228 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 10229 ins_encode %{ 10230 int vector_len = 2; 10231 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10232 %} 10233 ins_pipe( pipe_slow ); 10234 %} 10235 10236 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 10237 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 10238 match(Set dst (DivVD src (LoadVector mem))); 10239 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 10240 ins_encode %{ 10241 int vector_len = 2; 10242 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10243 %} 10244 ins_pipe( pipe_slow ); 10245 %} 10246 10247 // ------------------------------ Min --------------------------------------- 10248 // Byte vector Min 10249 instruct min8B_reg(vecD dst, vecD src1, vecD src2) %{ 10250 predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10251 match(Set dst (MinV src1 src2)); 10252 effect(TEMP dst); 10253 format %{ "movsd $dst,$src1\n\t" 10254 "pminsb $dst,$src2\t! " %} 10255 ins_encode %{ 10256 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10257 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 10258 %} 10259 ins_pipe( pipe_slow ); 10260 %} 10261 10262 instruct min16B_reg(vecX dst, vecX src1, vecX src2) %{ 10263 predicate(UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10264 match(Set dst (MinV src1 src2)); 10265 effect(TEMP dst); 10266 format %{ "movdqu $dst,$src1\n\t" 10267 "pminsb $dst,$src2\t! " %} 10268 ins_encode %{ 10269 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10270 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 10271 %} 10272 ins_pipe( pipe_slow ); 10273 %} 10274 10275 instruct min16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10276 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10277 match(Set dst (MinV src1 src2)); 10278 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10279 ins_encode %{ 10280 int vector_len = 0; 10281 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10282 %} 10283 ins_pipe( pipe_slow ); 10284 %} 10285 10286 instruct min16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10287 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10288 match(Set dst (MinV src1 src2)); 10289 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10290 ins_encode %{ 10291 int vector_len = 0; 10292 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10293 %} 10294 ins_pipe( pipe_slow ); 10295 %} 10296 10297 instruct min32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10298 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10299 match(Set dst (MinV src1 src2)); 10300 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10301 ins_encode %{ 10302 int vector_len = 1; 10303 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10304 %} 10305 ins_pipe( pipe_slow ); 10306 %} 10307 10308 instruct min32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10309 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10310 match(Set dst (MinV src1 src2)); 10311 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10312 ins_encode %{ 10313 int vector_len = 1; 10314 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10315 %} 10316 ins_pipe( pipe_slow ); 10317 %} 10318 10319 instruct min64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10320 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10321 match(Set dst (MinV src1 src2)); 10322 format %{ "vpminsb $dst,$src1,$src2\t! " %} 10323 ins_encode %{ 10324 int vector_len = 2; 10325 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10326 %} 10327 ins_pipe( pipe_slow ); 10328 %} 10329 10330 //Short vector Min 10331 instruct min4S_reg(vecD dst, vecD src1, vecD src2) %{ 10332 predicate(UseSSE > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10333 match(Set dst (MinV src1 src2)); 10334 effect(TEMP dst); 10335 format %{ "movsd $dst,$src1\n\t" 10336 "pminsw $dst,$src2\t! " %} 10337 ins_encode %{ 10338 __ movsd($src1$$XMMRegister, $src2$$XMMRegister); 10339 __ pminsw($dst$$XMMRegister, $src1$$XMMRegister); 10340 %} 10341 ins_pipe( pipe_slow ); 10342 %} 10343 10344 instruct min8S_reg(vecX dst, vecX src1, vecX src2) %{ 10345 predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10346 match(Set dst (MinV src1 src2)); 10347 effect(TEMP dst); 10348 format %{ "movsd $dst,$src1\n\t" 10349 "pminsw $dst,$src2\t! " %} 10350 ins_encode %{ 10351 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10352 __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); 10353 %} 10354 ins_pipe( pipe_slow ); 10355 %} 10356 10357 instruct min8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10358 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10359 match(Set dst (MinV src1 src2)); 10360 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10361 ins_encode %{ 10362 int vector_len = 0; 10363 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10364 %} 10365 ins_pipe( pipe_slow ); 10366 %} 10367 10368 instruct min8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10369 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10370 match(Set dst (MinV src1 src2)); 10371 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10372 ins_encode %{ 10373 int vector_len = 0; 10374 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10375 %} 10376 ins_pipe( pipe_slow ); 10377 %} 10378 10379 instruct min16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10380 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10381 match(Set dst (MinV src1 src2)); 10382 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10383 ins_encode %{ 10384 int vector_len = 1; 10385 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10386 %} 10387 ins_pipe( pipe_slow ); 10388 %} 10389 10390 instruct min16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10391 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10392 match(Set dst (MinV src1 src2)); 10393 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10394 ins_encode %{ 10395 int vector_len = 1; 10396 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10397 %} 10398 ins_pipe( pipe_slow ); 10399 %} 10400 10401 instruct min32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10402 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10403 match(Set dst (MinV src1 src2)); 10404 format %{ "vpminsw $dst,$src1,$src2\t! " %} 10405 ins_encode %{ 10406 int vector_len = 2; 10407 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10408 %} 10409 ins_pipe( pipe_slow ); 10410 %} 10411 10412 // Int vector Min 10413 instruct min2I_reg(vecD dst, vecD src1, vecD src2) %{ 10414 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10415 match(Set dst (MinV src1 src2)); 10416 effect(TEMP dst); 10417 format %{ "movsd $dst,$src1\n\t" 10418 "pminsd $dst,$src2\t! " %} 10419 ins_encode %{ 10420 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10421 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 10422 %} 10423 ins_pipe( pipe_slow ); 10424 %} 10425 10426 instruct min4I_reg(vecX dst, vecX src1, vecX src2) %{ 10427 predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10428 match(Set dst (MinV src1 src2)); 10429 effect(TEMP dst); 10430 format %{ "movdqu $dst,$src1\n\t" 10431 "pminsd $dst,$src2\t! " %} 10432 ins_encode %{ 10433 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10434 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 10435 %} 10436 ins_pipe( pipe_slow ); 10437 %} 10438 10439 instruct min4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10440 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10441 match(Set dst (MinV src1 src2)); 10442 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10443 ins_encode %{ 10444 int vector_len = 0; 10445 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10446 %} 10447 ins_pipe( pipe_slow ); 10448 %} 10449 10450 instruct min4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10451 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10452 match(Set dst (MinV src1 src2)); 10453 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10454 ins_encode %{ 10455 int vector_len = 0; 10456 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10457 %} 10458 ins_pipe( pipe_slow ); 10459 %} 10460 10461 instruct min8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10462 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10463 match(Set dst (MinV src1 src2)); 10464 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10465 ins_encode %{ 10466 int vector_len = 1; 10467 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10468 %} 10469 ins_pipe( pipe_slow ); 10470 %} 10471 10472 instruct min8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10473 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10474 match(Set dst (MinV src1 src2)); 10475 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10476 ins_encode %{ 10477 int vector_len = 1; 10478 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10479 %} 10480 ins_pipe( pipe_slow ); 10481 %} 10482 10483 instruct min16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10484 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10485 match(Set dst (MinV src1 src2)); 10486 format %{ "vpminsd $dst,$src1,$src2\t! " %} 10487 ins_encode %{ 10488 int vector_len = 2; 10489 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10490 %} 10491 ins_pipe( pipe_slow ); 10492 %} 10493 10494 // Long vector Min 10495 instruct minL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 10496 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10497 match(Set dst (MinV src1 src2)); 10498 effect(TEMP dst, TEMP tmp); 10499 format %{ "movsd $tmp,$src1\n\t" 10500 "movsd $dst,$src1\n\t" 10501 "pcmpgtq $tmp,$src2\n\t" 10502 "blendvpd $dst,$src2\t! " %} 10503 ins_encode %{ 10504 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 10505 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10506 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 10507 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 10508 %} 10509 ins_pipe( pipe_slow ); 10510 %} 10511 10512 instruct min2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 10513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10514 match(Set dst (MinV src1 src2)); 10515 effect(TEMP dst, TEMP tmp); 10516 format %{ "movdqu $tmp,$src1\n\t" 10517 "movdqu $dst,$src1\n\t" 10518 "pcmpgtq $tmp,$src2\n\t" 10519 "blendvpd $dst,$src2\t! " %} 10520 ins_encode %{ 10521 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 10522 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10523 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 10524 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 10525 %} 10526 ins_pipe( pipe_slow ); 10527 %} 10528 10529 instruct min2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10530 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10531 match(Set dst (MinV src1 src2)); 10532 effect(TEMP dst); 10533 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 10534 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 10535 ins_encode %{ 10536 int vector_len = 0; 10537 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10538 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10539 %} 10540 ins_pipe( pipe_slow ); 10541 %} 10542 10543 instruct min4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10544 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10545 match(Set dst (MinV src1 src2)); 10546 effect(TEMP dst); 10547 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 10548 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 10549 ins_encode %{ 10550 int vector_len = 1; 10551 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10552 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 10553 %} 10554 ins_pipe( pipe_slow ); 10555 %} 10556 10557 instruct min2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10558 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10559 match(Set dst (MinV src1 src2)); 10560 format %{ "vpminsq $dst,$src1,src2\t! " %} 10561 ins_encode %{ 10562 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 10563 %} 10564 ins_pipe( pipe_slow ); 10565 %} 10566 10567 instruct min4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10568 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10569 match(Set dst (MinV src1 src2)); 10570 format %{ "vpminsq $dst,$src1,src2\t! " %} 10571 ins_encode %{ 10572 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 10573 %} 10574 ins_pipe( pipe_slow ); 10575 %} 10576 10577 instruct min8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10578 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10579 match(Set dst (MinV src1 src2)); 10580 format %{ "vpminsq $dst,$src1,src2\t! " %} 10581 ins_encode %{ 10582 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 10583 %} 10584 ins_pipe( pipe_slow ); 10585 %} 10586 10587 // Float vector Min 10588 instruct min2F_reg(vecD dst, vecD src1, vecD src2) %{ 10589 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10590 match(Set dst (MinV src1 src2)); 10591 effect(TEMP dst); 10592 format %{ "movsd $dst,$src1\n\t" 10593 "minps $dst,$src2\t! " %} 10594 ins_encode %{ 10595 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10596 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 10597 %} 10598 ins_pipe( pipe_slow ); 10599 %} 10600 10601 instruct min4F_reg(vecX dst, vecX src1, vecX src2) %{ 10602 predicate(UseSSE > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10603 match(Set dst (MinV src1 src2)); 10604 effect(TEMP dst); 10605 format %{ "movdqu $dst,$src1\n\t" 10606 "minps $dst,$src2\t! " %} 10607 ins_encode %{ 10608 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10609 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 10610 %} 10611 ins_pipe( pipe_slow ); 10612 %} 10613 10614 instruct min4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10615 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10616 match(Set dst (MinV src1 src2)); 10617 format %{ "vminps $dst,$src1,$src2\t! " %} 10618 ins_encode %{ 10619 int vector_len = 0; 10620 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10621 %} 10622 ins_pipe( pipe_slow ); 10623 %} 10624 10625 instruct min4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10626 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10627 match(Set dst (MinV src1 src2)); 10628 format %{ "vminps $dst,$src1,$src2\t! " %} 10629 ins_encode %{ 10630 int vector_len = 0; 10631 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10632 %} 10633 ins_pipe( pipe_slow ); 10634 %} 10635 10636 instruct min8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10637 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10638 match(Set dst (MinV src1 src2)); 10639 format %{ "vminps $dst,$src1,$src2\t! " %} 10640 ins_encode %{ 10641 int vector_len = 1; 10642 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10643 %} 10644 ins_pipe( pipe_slow ); 10645 %} 10646 10647 instruct min8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10648 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10649 match(Set dst (MinV src1 src2)); 10650 format %{ "vminps $dst,$src1,$src2\t! " %} 10651 ins_encode %{ 10652 int vector_len = 1; 10653 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10654 %} 10655 ins_pipe( pipe_slow ); 10656 %} 10657 10658 instruct min16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10659 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 10660 match(Set dst (MinV src1 src2)); 10661 format %{ "vminps $dst,$src1,$src2\t! " %} 10662 ins_encode %{ 10663 int vector_len = 2; 10664 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10665 %} 10666 ins_pipe( pipe_slow ); 10667 %} 10668 10669 // Double vector Min 10670 instruct minD_reg(vecD dst, vecD src1, vecD src2) %{ 10671 predicate(UseSSE > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10672 match(Set dst (MinV src1 src2)); 10673 effect(TEMP dst); 10674 format %{ "movsd $dst,$src1\n\t" 10675 "minpd $dst,$src2\t! " %} 10676 ins_encode %{ 10677 int vector_len = 0; 10678 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10679 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 10680 %} 10681 ins_pipe( pipe_slow ); 10682 %} 10683 10684 instruct min2D_reg(vecX dst, vecX src1, vecX src2) %{ 10685 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10686 match(Set dst (MinV src1 src2)); 10687 effect(TEMP dst); 10688 format %{ "movdqu $dst,$src1\n\t" 10689 "minpd $dst,$src2\t! " %} 10690 ins_encode %{ 10691 int vector_len = 0; 10692 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10693 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 10694 %} 10695 ins_pipe( pipe_slow ); 10696 %} 10697 10698 instruct min2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10699 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10700 match(Set dst (MinV src1 src2)); 10701 format %{ "vminpd $dst,$src1,$src2\t! " %} 10702 ins_encode %{ 10703 int vector_len = 0; 10704 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10705 %} 10706 ins_pipe( pipe_slow ); 10707 %} 10708 10709 instruct min2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10710 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10711 match(Set dst (MinV src1 src2)); 10712 format %{ "vminpd $dst,$src1,$src2\t! " %} 10713 ins_encode %{ 10714 int vector_len = 0; 10715 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10716 %} 10717 ins_pipe( pipe_slow ); 10718 %} 10719 10720 instruct min4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10721 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10722 match(Set dst (MinV src1 src2)); 10723 format %{ "vminpd $dst,$src1,$src2\t! " %} 10724 ins_encode %{ 10725 int vector_len = 1; 10726 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10727 %} 10728 ins_pipe( pipe_slow ); 10729 %} 10730 10731 instruct min4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10732 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10733 match(Set dst (MinV src1 src2)); 10734 format %{ "vminpd $dst,$src1,$src2\t! " %} 10735 ins_encode %{ 10736 int vector_len = 1; 10737 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10738 %} 10739 ins_pipe( pipe_slow ); 10740 %} 10741 10742 instruct min8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10743 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 10744 match(Set dst (MinV src1 src2)); 10745 format %{ "vminpd $dst,$src1,$src2\t! " %} 10746 ins_encode %{ 10747 int vector_len = 2; 10748 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10749 %} 10750 ins_pipe( pipe_slow ); 10751 %} 10752 10753 // ------------------------------ Max --------------------------------------- 10754 // Byte vector Max 10755 instruct max8B_reg(vecD dst, vecD src1, vecD src2) %{ 10756 predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10757 match(Set dst (MaxV src1 src2)); 10758 effect(TEMP dst); 10759 format %{ "movsd $dst,$src1\n\t" 10760 "pmaxsb $dst,$src2\t! " %} 10761 ins_encode %{ 10762 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10763 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 10764 %} 10765 ins_pipe( pipe_slow ); 10766 %} 10767 10768 instruct max16B_reg(vecX dst, vecX src1, vecX src2) %{ 10769 predicate(UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10770 match(Set dst (MaxV src1 src2)); 10771 effect(TEMP dst); 10772 format %{ "movdqu $dst,$src1\n\t" 10773 "pmaxsb $dst,$src2\t! " %} 10774 ins_encode %{ 10775 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10776 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 10777 %} 10778 ins_pipe( pipe_slow ); 10779 %} 10780 10781 instruct max16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10782 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10783 match(Set dst (MaxV src1 src2)); 10784 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10785 ins_encode %{ 10786 int vector_len = 0; 10787 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10788 %} 10789 ins_pipe( pipe_slow ); 10790 %} 10791 10792 instruct max16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10793 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10794 match(Set dst (MaxV src1 src2)); 10795 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10796 ins_encode %{ 10797 int vector_len = 0; 10798 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10799 %} 10800 ins_pipe( pipe_slow ); 10801 %} 10802 10803 instruct max32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10804 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10805 match(Set dst (MaxV src1 src2)); 10806 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10807 ins_encode %{ 10808 int vector_len = 1; 10809 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10810 %} 10811 ins_pipe( pipe_slow ); 10812 %} 10813 10814 instruct max32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10815 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10816 match(Set dst (MaxV src1 src2)); 10817 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10818 ins_encode %{ 10819 int vector_len = 1; 10820 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10821 %} 10822 ins_pipe( pipe_slow ); 10823 %} 10824 10825 instruct max64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10826 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10827 match(Set dst (MaxV src1 src2)); 10828 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 10829 ins_encode %{ 10830 int vector_len = 2; 10831 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10832 %} 10833 ins_pipe( pipe_slow ); 10834 %} 10835 10836 //Short vector Max 10837 instruct max4S_reg(vecD dst, vecD src1, vecD src2) %{ 10838 predicate(UseSSE > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10839 match(Set dst (MaxV src1 src2)); 10840 effect(TEMP dst); 10841 format %{ "movsd $dst,$src1\n\t" 10842 "pmaxsw $dst,$src2\t! " %} 10843 ins_encode %{ 10844 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 10845 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 10846 %} 10847 ins_pipe( pipe_slow ); 10848 %} 10849 10850 instruct max8S_reg(vecX dst, vecX src1, vecX src2) %{ 10851 predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10852 match(Set dst (MaxV src1 src2)); 10853 effect(TEMP dst); 10854 format %{ "movdqu $dst,$src1\n\t" 10855 "pmaxsw $dst,$src2\t! " %} 10856 ins_encode %{ 10857 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10858 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 10859 %} 10860 ins_pipe( pipe_slow ); 10861 %} 10862 10863 instruct max8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10864 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10865 match(Set dst (MaxV src1 src2)); 10866 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10867 ins_encode %{ 10868 int vector_len = 0; 10869 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10870 %} 10871 ins_pipe( pipe_slow ); 10872 %} 10873 10874 instruct max8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10875 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10876 match(Set dst (MaxV src1 src2)); 10877 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10878 ins_encode %{ 10879 int vector_len = 0; 10880 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10881 %} 10882 ins_pipe( pipe_slow ); 10883 %} 10884 10885 instruct max16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10886 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10887 match(Set dst (MaxV src1 src2)); 10888 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10889 ins_encode %{ 10890 int vector_len = 1; 10891 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10892 %} 10893 ins_pipe( pipe_slow ); 10894 %} 10895 10896 instruct max16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10897 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10898 match(Set dst (MaxV src1 src2)); 10899 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10900 ins_encode %{ 10901 int vector_len = 1; 10902 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10903 %} 10904 ins_pipe( pipe_slow ); 10905 %} 10906 10907 instruct max32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10908 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10909 match(Set dst (MaxV src1 src2)); 10910 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 10911 ins_encode %{ 10912 int vector_len = 2; 10913 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10914 %} 10915 ins_pipe( pipe_slow ); 10916 %} 10917 10918 // Int vector Max 10919 instruct max2I_reg(vecD dst, vecD src1, vecD src2) %{ 10920 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10921 match(Set dst (MaxV src1 src2)); 10922 effect(TEMP dst); 10923 format %{ "movdqu $dst,$src1\n\t" 10924 "pmaxsd $dst,$src2\t! " %} 10925 ins_encode %{ 10926 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10927 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 10928 %} 10929 ins_pipe( pipe_slow ); 10930 %} 10931 10932 instruct max4I_reg(vecX dst, vecX src1, vecX src2) %{ 10933 predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10934 match(Set dst (MaxV src1 src2)); 10935 effect(TEMP dst); 10936 format %{ "movdqu $dst,$src1\n\t" 10937 "pmaxsd $dst,$src2\t! " %} 10938 ins_encode %{ 10939 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 10940 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 10941 %} 10942 ins_pipe( pipe_slow ); 10943 %} 10944 10945 instruct max4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 10946 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10947 match(Set dst (MaxV src1 src2)); 10948 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 10949 ins_encode %{ 10950 int vector_len = 0; 10951 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10952 %} 10953 ins_pipe( pipe_slow ); 10954 %} 10955 10956 instruct max4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 10957 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10958 match(Set dst (MaxV src1 src2)); 10959 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 10960 ins_encode %{ 10961 int vector_len = 0; 10962 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10963 %} 10964 ins_pipe( pipe_slow ); 10965 %} 10966 10967 instruct max8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 10968 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10969 match(Set dst (MaxV src1 src2)); 10970 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 10971 ins_encode %{ 10972 int vector_len = 1; 10973 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10974 %} 10975 ins_pipe( pipe_slow ); 10976 %} 10977 10978 instruct max8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 10979 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10980 match(Set dst (MaxV src1 src2)); 10981 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 10982 ins_encode %{ 10983 int vector_len = 1; 10984 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10985 %} 10986 ins_pipe( pipe_slow ); 10987 %} 10988 10989 instruct max16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 10990 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 10991 match(Set dst (MaxV src1 src2)); 10992 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 10993 ins_encode %{ 10994 int vector_len = 2; 10995 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10996 %} 10997 ins_pipe( pipe_slow ); 10998 %} 10999 11000 // Long Vector Max 11001 instruct maxL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 11002 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11003 match(Set dst (MaxV src1 src2)); 11004 effect(TEMP dst, TEMP tmp); 11005 format %{ "movsd $tmp,$src1\n\t" 11006 "movsd $dst,$src1\n\t" 11007 "pcmpgtq $tmp,$src2\n\t" 11008 "blendvpd $dst,$src2\t! " %} 11009 ins_encode %{ 11010 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 11011 __ movsd($dst$$XMMRegister, $src2$$XMMRegister); 11012 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 11013 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister); 11014 %} 11015 ins_pipe( pipe_slow ); 11016 %} 11017 11018 instruct max2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 11019 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11020 match(Set dst (MaxV src1 src2)); 11021 effect(TEMP dst, TEMP tmp); 11022 format %{ "movdqu $tmp,$src1\n\t" 11023 "movdqu $dst,$src2\n\t" 11024 "pcmpgtq $tmp,$src2\n\t" 11025 "blendvpd $dst,$src1\t! " %} 11026 ins_encode %{ 11027 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 11028 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11029 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 11030 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 11031 %} 11032 ins_pipe( pipe_slow ); 11033 %} 11034 11035 instruct max2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11036 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11037 match(Set dst (MaxV src1 src2)); 11038 effect(TEMP dst); 11039 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 11040 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 11041 ins_encode %{ 11042 int vector_len = 0; 11043 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11044 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 11045 %} 11046 ins_pipe( pipe_slow ); 11047 %} 11048 11049 instruct max2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11050 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11051 match(Set dst (MaxV src1 src2)); 11052 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 11053 ins_encode %{ 11054 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 11055 %} 11056 ins_pipe( pipe_slow ); 11057 %} 11058 11059 instruct max4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11060 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11061 match(Set dst (MaxV src1 src2)); 11062 effect(TEMP dst); 11063 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 11064 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 11065 ins_encode %{ 11066 int vector_len = 1; 11067 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11068 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 11069 %} 11070 ins_pipe( pipe_slow ); 11071 %} 11072 11073 instruct max4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11074 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11075 match(Set dst (MaxV src1 src2)); 11076 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 11077 ins_encode %{ 11078 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 11079 %} 11080 ins_pipe( pipe_slow ); 11081 %} 11082 11083 instruct max8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11084 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11085 match(Set dst (MaxV src1 src2)); 11086 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 11087 ins_encode %{ 11088 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); 11089 %} 11090 ins_pipe( pipe_slow ); 11091 %} 11092 11093 // Float Vector Max 11094 instruct max2F_reg(vecD dst, vecD src1, vecD src2) %{ 11095 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11096 match(Set dst (MaxV src1 src2)); 11097 effect(TEMP dst); 11098 format %{ "movsd $dst,$src1\n\t" 11099 "maxps $dst,$src2\t! " %} 11100 ins_encode %{ 11101 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 11102 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 11103 %} 11104 ins_pipe( pipe_slow ); 11105 %} 11106 11107 instruct max4F_reg(vecX dst, vecX src1, vecX src2) %{ 11108 predicate(UseSSE > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11109 match(Set dst (MaxV src1 src2)); 11110 effect(TEMP dst); 11111 format %{ "movdqu $dst,$src1\n\t" 11112 "maxps $dst,$src2\t! " %} 11113 ins_encode %{ 11114 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11115 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 11116 %} 11117 ins_pipe( pipe_slow ); 11118 %} 11119 11120 instruct max4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11121 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11122 match(Set dst (MaxV src1 src2)); 11123 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11124 ins_encode %{ 11125 int vector_len = 0; 11126 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11127 %} 11128 ins_pipe( pipe_slow ); 11129 %} 11130 11131 instruct max4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11132 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11133 match(Set dst (MaxV src1 src2)); 11134 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11135 ins_encode %{ 11136 int vector_len = 0; 11137 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11138 %} 11139 ins_pipe( pipe_slow ); 11140 %} 11141 11142 instruct max8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11143 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11144 match(Set dst (MaxV src1 src2)); 11145 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11146 ins_encode %{ 11147 int vector_len = 1; 11148 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11149 %} 11150 ins_pipe( pipe_slow ); 11151 %} 11152 11153 instruct max8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11154 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11155 match(Set dst (MaxV src1 src2)); 11156 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11157 ins_encode %{ 11158 int vector_len = 1; 11159 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11160 %} 11161 ins_pipe( pipe_slow ); 11162 %} 11163 11164 instruct max16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11165 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 11166 match(Set dst (MaxV src1 src2)); 11167 format %{ "vmaxps $dst,$src1,$src2\t! " %} 11168 ins_encode %{ 11169 int vector_len = 2; 11170 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11171 %} 11172 ins_pipe( pipe_slow ); 11173 %} 11174 11175 // Double Vector Max 11176 instruct maxD_reg(vecD dst, vecD src1, vecD src2) %{ 11177 predicate(UseSSE > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11178 match(Set dst (MaxV src1 src2)); 11179 effect(TEMP dst); 11180 format %{ "movsd $dst,$src1\n\t" 11181 "maxpd $dst,$src2\t! " %} 11182 ins_encode %{ 11183 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 11184 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 11185 %} 11186 ins_pipe( pipe_slow ); 11187 %} 11188 11189 instruct max2D_reg(vecX dst, vecX src1, vecX src2) %{ 11190 predicate(UseSSE > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11191 match(Set dst (MaxV src1 src2)); 11192 effect(TEMP dst); 11193 format %{ "movdqu $dst,$src1\n\t" 11194 "maxpd $dst,$src2\t! " %} 11195 ins_encode %{ 11196 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 11197 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 11198 %} 11199 ins_pipe( pipe_slow ); 11200 %} 11201 11202 instruct max2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11203 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11204 match(Set dst (MaxV src1 src2)); 11205 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11206 ins_encode %{ 11207 int vector_len = 0; 11208 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11209 %} 11210 ins_pipe( pipe_slow ); 11211 %} 11212 11213 instruct max2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11214 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11215 match(Set dst (MaxV src1 src2)); 11216 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11217 ins_encode %{ 11218 int vector_len = 0; 11219 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11220 %} 11221 ins_pipe( pipe_slow ); 11222 %} 11223 11224 instruct max4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11225 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11226 match(Set dst (MaxV src1 src2)); 11227 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11228 ins_encode %{ 11229 int vector_len = 1; 11230 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11231 %} 11232 ins_pipe( pipe_slow ); 11233 %} 11234 11235 instruct max4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11236 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11237 match(Set dst (MaxV src1 src2)); 11238 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11239 ins_encode %{ 11240 int vector_len = 1; 11241 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11242 %} 11243 ins_pipe( pipe_slow ); 11244 %} 11245 11246 instruct max8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 11247 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 11248 match(Set dst (MaxV src1 src2)); 11249 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 11250 ins_encode %{ 11251 int vector_len = 2; 11252 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11253 %} 11254 ins_pipe( pipe_slow ); 11255 %} 11256 11257 // ------------------------------ Shift --------------------------------------- 11258 11259 // Left and right shift count vectors are the same on x86 11260 // (only lowest bits of xmm reg are used for count). 11261 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 11262 match(Set dst (LShiftCntV cnt)); 11263 match(Set dst (RShiftCntV cnt)); 11264 format %{ "movd $dst,$cnt\t! load shift count" %} 11265 ins_encode %{ 11266 __ movdl($dst$$XMMRegister, $cnt$$Register); 11267 %} 11268 ins_pipe( pipe_slow ); 11269 %} 11270 11271 // --------------------------------- Sqrt -------------------------------------- 11272 11273 // Floating point vector sqrt 11274 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 11275 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11276 match(Set dst (SqrtVD src)); 11277 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 11278 ins_encode %{ 11279 int vector_len = 0; 11280 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11281 %} 11282 ins_pipe( pipe_slow ); 11283 %} 11284 11285 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 11286 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11287 match(Set dst (SqrtVD (LoadVector mem))); 11288 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 11289 ins_encode %{ 11290 int vector_len = 0; 11291 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 11292 %} 11293 ins_pipe( pipe_slow ); 11294 %} 11295 11296 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 11297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11298 match(Set dst (SqrtVD src)); 11299 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 11300 ins_encode %{ 11301 int vector_len = 1; 11302 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11303 %} 11304 ins_pipe( pipe_slow ); 11305 %} 11306 11307 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 11308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11309 match(Set dst (SqrtVD (LoadVector mem))); 11310 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 11311 ins_encode %{ 11312 int vector_len = 1; 11313 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 11314 %} 11315 ins_pipe( pipe_slow ); 11316 %} 11317 11318 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 11319 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 11320 match(Set dst (SqrtVD src)); 11321 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 11322 ins_encode %{ 11323 int vector_len = 2; 11324 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11325 %} 11326 ins_pipe( pipe_slow ); 11327 %} 11328 11329 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 11330 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 11331 match(Set dst (SqrtVD (LoadVector mem))); 11332 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 11333 ins_encode %{ 11334 int vector_len = 2; 11335 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 11336 %} 11337 ins_pipe( pipe_slow ); 11338 %} 11339 11340 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 11341 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11342 match(Set dst (SqrtVF src)); 11343 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 11344 ins_encode %{ 11345 int vector_len = 0; 11346 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11347 %} 11348 ins_pipe( pipe_slow ); 11349 %} 11350 11351 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 11352 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11353 match(Set dst (SqrtVF (LoadVector mem))); 11354 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 11355 ins_encode %{ 11356 int vector_len = 0; 11357 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11358 %} 11359 ins_pipe( pipe_slow ); 11360 %} 11361 11362 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 11363 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11364 match(Set dst (SqrtVF src)); 11365 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 11366 ins_encode %{ 11367 int vector_len = 0; 11368 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11369 %} 11370 ins_pipe( pipe_slow ); 11371 %} 11372 11373 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 11374 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11375 match(Set dst (SqrtVF (LoadVector mem))); 11376 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 11377 ins_encode %{ 11378 int vector_len = 0; 11379 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11380 %} 11381 ins_pipe( pipe_slow ); 11382 %} 11383 11384 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 11385 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 11386 match(Set dst (SqrtVF src)); 11387 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 11388 ins_encode %{ 11389 int vector_len = 1; 11390 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11391 %} 11392 ins_pipe( pipe_slow ); 11393 %} 11394 11395 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 11396 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 11397 match(Set dst (SqrtVF (LoadVector mem))); 11398 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 11399 ins_encode %{ 11400 int vector_len = 1; 11401 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11402 %} 11403 ins_pipe( pipe_slow ); 11404 %} 11405 11406 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 11407 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11408 match(Set dst (SqrtVF src)); 11409 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 11410 ins_encode %{ 11411 int vector_len = 2; 11412 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 11413 %} 11414 ins_pipe( pipe_slow ); 11415 %} 11416 11417 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 11418 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11419 match(Set dst (SqrtVF (LoadVector mem))); 11420 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 11421 ins_encode %{ 11422 int vector_len = 2; 11423 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 11424 %} 11425 ins_pipe( pipe_slow ); 11426 %} 11427 11428 // ------------------------------ LeftShift ----------------------------------- 11429 11430 // Shorts/Chars vector left shift 11431 instruct vsll2S(vecS dst, vecS shift) %{ 11432 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 11433 match(Set dst (LShiftVS dst shift)); 11434 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 11435 ins_encode %{ 11436 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 11437 %} 11438 ins_pipe( pipe_slow ); 11439 %} 11440 11441 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 11442 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 11443 match(Set dst (LShiftVS dst shift)); 11444 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 11445 ins_encode %{ 11446 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 11447 %} 11448 ins_pipe( pipe_slow ); 11449 %} 11450 11451 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 11452 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11453 match(Set dst (LShiftVS src shift)); 11454 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11455 ins_encode %{ 11456 int vector_len = 0; 11457 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11458 %} 11459 ins_pipe( pipe_slow ); 11460 %} 11461 11462 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 11463 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11464 match(Set dst (LShiftVS src shift)); 11465 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11466 ins_encode %{ 11467 int vector_len = 0; 11468 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11469 %} 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 11474 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11475 match(Set dst (LShiftVS dst shift)); 11476 effect(TEMP src); 11477 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11478 ins_encode %{ 11479 int vector_len = 0; 11480 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11481 %} 11482 ins_pipe( pipe_slow ); 11483 %} 11484 11485 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 11486 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11487 match(Set dst (LShiftVS src shift)); 11488 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11489 ins_encode %{ 11490 int vector_len = 0; 11491 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11492 %} 11493 ins_pipe( pipe_slow ); 11494 %} 11495 11496 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 11497 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11498 match(Set dst (LShiftVS src shift)); 11499 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11500 ins_encode %{ 11501 int vector_len = 0; 11502 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11503 %} 11504 ins_pipe( pipe_slow ); 11505 %} 11506 11507 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 11508 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11509 match(Set dst (LShiftVS dst shift)); 11510 effect(TEMP src); 11511 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 11512 ins_encode %{ 11513 int vector_len = 0; 11514 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11515 %} 11516 ins_pipe( pipe_slow ); 11517 %} 11518 11519 instruct vsll4S(vecD dst, vecS shift) %{ 11520 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11521 match(Set dst (LShiftVS dst shift)); 11522 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 11523 ins_encode %{ 11524 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 11525 %} 11526 ins_pipe( pipe_slow ); 11527 %} 11528 11529 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 11530 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11531 match(Set dst (LShiftVS dst shift)); 11532 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 11533 ins_encode %{ 11534 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 11535 %} 11536 ins_pipe( pipe_slow ); 11537 %} 11538 11539 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 11540 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11541 match(Set dst (LShiftVS src shift)); 11542 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11543 ins_encode %{ 11544 int vector_len = 0; 11545 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11546 %} 11547 ins_pipe( pipe_slow ); 11548 %} 11549 11550 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 11551 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11552 match(Set dst (LShiftVS src shift)); 11553 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11554 ins_encode %{ 11555 int vector_len = 0; 11556 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11557 %} 11558 ins_pipe( pipe_slow ); 11559 %} 11560 11561 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 11562 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11563 match(Set dst (LShiftVS dst shift)); 11564 effect(TEMP src); 11565 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11566 ins_encode %{ 11567 int vector_len = 0; 11568 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11569 %} 11570 ins_pipe( pipe_slow ); 11571 %} 11572 11573 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 11574 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11575 match(Set dst (LShiftVS src shift)); 11576 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11577 ins_encode %{ 11578 int vector_len = 0; 11579 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11580 %} 11581 ins_pipe( pipe_slow ); 11582 %} 11583 11584 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 11585 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11586 match(Set dst (LShiftVS src shift)); 11587 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11588 ins_encode %{ 11589 int vector_len = 0; 11590 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11591 %} 11592 ins_pipe( pipe_slow ); 11593 %} 11594 11595 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 11596 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11597 match(Set dst (LShiftVS dst shift)); 11598 effect(TEMP src); 11599 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 11600 ins_encode %{ 11601 int vector_len = 0; 11602 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11603 %} 11604 ins_pipe( pipe_slow ); 11605 %} 11606 11607 instruct vsll8S(vecX dst, vecS shift) %{ 11608 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11609 match(Set dst (LShiftVS dst shift)); 11610 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 11611 ins_encode %{ 11612 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 11613 %} 11614 ins_pipe( pipe_slow ); 11615 %} 11616 11617 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 11618 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11619 match(Set dst (LShiftVS dst shift)); 11620 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 11621 ins_encode %{ 11622 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 11623 %} 11624 ins_pipe( pipe_slow ); 11625 %} 11626 11627 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 11628 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11629 match(Set dst (LShiftVS src shift)); 11630 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11631 ins_encode %{ 11632 int vector_len = 0; 11633 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11634 %} 11635 ins_pipe( pipe_slow ); 11636 %} 11637 11638 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 11639 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11640 match(Set dst (LShiftVS src shift)); 11641 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11642 ins_encode %{ 11643 int vector_len = 0; 11644 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11645 %} 11646 ins_pipe( pipe_slow ); 11647 %} 11648 11649 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 11650 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11651 match(Set dst (LShiftVS dst shift)); 11652 effect(TEMP src); 11653 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11654 ins_encode %{ 11655 int vector_len = 0; 11656 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11657 %} 11658 ins_pipe( pipe_slow ); 11659 %} 11660 11661 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 11662 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11663 match(Set dst (LShiftVS src shift)); 11664 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11665 ins_encode %{ 11666 int vector_len = 0; 11667 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11668 %} 11669 ins_pipe( pipe_slow ); 11670 %} 11671 11672 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 11673 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11674 match(Set dst (LShiftVS src shift)); 11675 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11676 ins_encode %{ 11677 int vector_len = 0; 11678 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11679 %} 11680 ins_pipe( pipe_slow ); 11681 %} 11682 11683 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 11684 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11685 match(Set dst (LShiftVS dst shift)); 11686 effect(TEMP src); 11687 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 11688 ins_encode %{ 11689 int vector_len = 0; 11690 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11691 %} 11692 ins_pipe( pipe_slow ); 11693 %} 11694 11695 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 11696 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11697 match(Set dst (LShiftVS src shift)); 11698 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11699 ins_encode %{ 11700 int vector_len = 1; 11701 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11702 %} 11703 ins_pipe( pipe_slow ); 11704 %} 11705 11706 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 11707 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11708 match(Set dst (LShiftVS src shift)); 11709 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11710 ins_encode %{ 11711 int vector_len = 1; 11712 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11713 %} 11714 ins_pipe( pipe_slow ); 11715 %} 11716 11717 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 11718 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11719 match(Set dst (LShiftVS dst shift)); 11720 effect(TEMP src); 11721 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11722 ins_encode %{ 11723 int vector_len = 1; 11724 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11725 %} 11726 ins_pipe( pipe_slow ); 11727 %} 11728 11729 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 11730 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11731 match(Set dst (LShiftVS src shift)); 11732 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11733 ins_encode %{ 11734 int vector_len = 1; 11735 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11736 %} 11737 ins_pipe( pipe_slow ); 11738 %} 11739 11740 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 11741 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11742 match(Set dst (LShiftVS src shift)); 11743 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11744 ins_encode %{ 11745 int vector_len = 1; 11746 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11747 %} 11748 ins_pipe( pipe_slow ); 11749 %} 11750 11751 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 11752 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11753 match(Set dst (LShiftVS dst shift)); 11754 effect(TEMP src); 11755 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 11756 ins_encode %{ 11757 int vector_len = 1; 11758 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11759 %} 11760 ins_pipe( pipe_slow ); 11761 %} 11762 11763 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 11764 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11765 match(Set dst (LShiftVS src shift)); 11766 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 11767 ins_encode %{ 11768 int vector_len = 2; 11769 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11770 %} 11771 ins_pipe( pipe_slow ); 11772 %} 11773 11774 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 11775 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11776 match(Set dst (LShiftVS src shift)); 11777 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 11778 ins_encode %{ 11779 int vector_len = 2; 11780 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11781 %} 11782 ins_pipe( pipe_slow ); 11783 %} 11784 11785 // Integers vector left shift 11786 instruct vsll2I(vecD dst, vecS shift) %{ 11787 predicate(n->as_Vector()->length() == 2); 11788 match(Set dst (LShiftVI dst shift)); 11789 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 11790 ins_encode %{ 11791 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 11792 %} 11793 ins_pipe( pipe_slow ); 11794 %} 11795 11796 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 11797 predicate(n->as_Vector()->length() == 2); 11798 match(Set dst (LShiftVI dst shift)); 11799 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 11800 ins_encode %{ 11801 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 11802 %} 11803 ins_pipe( pipe_slow ); 11804 %} 11805 11806 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 11807 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11808 match(Set dst (LShiftVI src shift)); 11809 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 11810 ins_encode %{ 11811 int vector_len = 0; 11812 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11813 %} 11814 ins_pipe( pipe_slow ); 11815 %} 11816 11817 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 11818 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11819 match(Set dst (LShiftVI src shift)); 11820 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 11821 ins_encode %{ 11822 int vector_len = 0; 11823 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11824 %} 11825 ins_pipe( pipe_slow ); 11826 %} 11827 11828 instruct vsll4I(vecX dst, vecS shift) %{ 11829 predicate(n->as_Vector()->length() == 4); 11830 match(Set dst (LShiftVI dst shift)); 11831 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 11832 ins_encode %{ 11833 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 11834 %} 11835 ins_pipe( pipe_slow ); 11836 %} 11837 11838 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 11839 predicate(n->as_Vector()->length() == 4); 11840 match(Set dst (LShiftVI dst shift)); 11841 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 11842 ins_encode %{ 11843 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 11844 %} 11845 ins_pipe( pipe_slow ); 11846 %} 11847 11848 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 11849 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11850 match(Set dst (LShiftVI src shift)); 11851 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 11852 ins_encode %{ 11853 int vector_len = 0; 11854 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11855 %} 11856 ins_pipe( pipe_slow ); 11857 %} 11858 11859 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 11860 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11861 match(Set dst (LShiftVI src shift)); 11862 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 11863 ins_encode %{ 11864 int vector_len = 0; 11865 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11866 %} 11867 ins_pipe( pipe_slow ); 11868 %} 11869 11870 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 11871 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11872 match(Set dst (LShiftVI src shift)); 11873 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 11874 ins_encode %{ 11875 int vector_len = 1; 11876 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11877 %} 11878 ins_pipe( pipe_slow ); 11879 %} 11880 11881 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 11882 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11883 match(Set dst (LShiftVI src shift)); 11884 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 11885 ins_encode %{ 11886 int vector_len = 1; 11887 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 11893 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11894 match(Set dst (LShiftVI src shift)); 11895 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 11896 ins_encode %{ 11897 int vector_len = 2; 11898 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11899 %} 11900 ins_pipe( pipe_slow ); 11901 %} 11902 11903 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 11904 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11905 match(Set dst (LShiftVI src shift)); 11906 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 11907 ins_encode %{ 11908 int vector_len = 2; 11909 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11910 %} 11911 ins_pipe( pipe_slow ); 11912 %} 11913 11914 // Longs vector left shift 11915 instruct vsll2L(vecX dst, vecS shift) %{ 11916 predicate(n->as_Vector()->length() == 2); 11917 match(Set dst (LShiftVL dst shift)); 11918 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 11919 ins_encode %{ 11920 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 11921 %} 11922 ins_pipe( pipe_slow ); 11923 %} 11924 11925 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 11926 predicate(n->as_Vector()->length() == 2); 11927 match(Set dst (LShiftVL dst shift)); 11928 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 11929 ins_encode %{ 11930 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 11931 %} 11932 ins_pipe( pipe_slow ); 11933 %} 11934 11935 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 11936 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11937 match(Set dst (LShiftVL src shift)); 11938 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 11939 ins_encode %{ 11940 int vector_len = 0; 11941 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11942 %} 11943 ins_pipe( pipe_slow ); 11944 %} 11945 11946 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 11947 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11948 match(Set dst (LShiftVL src shift)); 11949 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 11950 ins_encode %{ 11951 int vector_len = 0; 11952 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11953 %} 11954 ins_pipe( pipe_slow ); 11955 %} 11956 11957 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 11958 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 11959 match(Set dst (LShiftVL src shift)); 11960 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 11961 ins_encode %{ 11962 int vector_len = 1; 11963 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11964 %} 11965 ins_pipe( pipe_slow ); 11966 %} 11967 11968 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 11969 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 11970 match(Set dst (LShiftVL src shift)); 11971 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 11972 ins_encode %{ 11973 int vector_len = 1; 11974 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11975 %} 11976 ins_pipe( pipe_slow ); 11977 %} 11978 11979 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 11980 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 11981 match(Set dst (LShiftVL src shift)); 11982 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 11983 ins_encode %{ 11984 int vector_len = 2; 11985 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 11986 %} 11987 ins_pipe( pipe_slow ); 11988 %} 11989 11990 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 11991 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 11992 match(Set dst (LShiftVL src shift)); 11993 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 11994 ins_encode %{ 11995 int vector_len = 2; 11996 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 11997 %} 11998 ins_pipe( pipe_slow ); 11999 %} 12000 12001 // ----------------------- LogicalRightShift ----------------------------------- 12002 12003 // Shorts vector logical right shift produces incorrect Java result 12004 // for negative data because java code convert short value into int with 12005 // sign extension before a shift. But char vectors are fine since chars are 12006 // unsigned values. 12007 12008 instruct vsrl2S(vecS dst, vecS shift) %{ 12009 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12010 match(Set dst (URShiftVS dst shift)); 12011 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 12012 ins_encode %{ 12013 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 12014 %} 12015 ins_pipe( pipe_slow ); 12016 %} 12017 12018 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 12019 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12020 match(Set dst (URShiftVS dst shift)); 12021 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 12022 ins_encode %{ 12023 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 12024 %} 12025 ins_pipe( pipe_slow ); 12026 %} 12027 12028 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 12029 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12030 match(Set dst (URShiftVS src shift)); 12031 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12032 ins_encode %{ 12033 int vector_len = 0; 12034 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12035 %} 12036 ins_pipe( pipe_slow ); 12037 %} 12038 12039 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 12040 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12041 match(Set dst (URShiftVS src shift)); 12042 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12043 ins_encode %{ 12044 int vector_len = 0; 12045 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12046 %} 12047 ins_pipe( pipe_slow ); 12048 %} 12049 12050 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 12051 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12052 match(Set dst (URShiftVS dst shift)); 12053 effect(TEMP src); 12054 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12055 ins_encode %{ 12056 int vector_len = 0; 12057 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12058 %} 12059 ins_pipe( pipe_slow ); 12060 %} 12061 12062 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 12063 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12064 match(Set dst (URShiftVS src shift)); 12065 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12066 ins_encode %{ 12067 int vector_len = 0; 12068 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12069 %} 12070 ins_pipe( pipe_slow ); 12071 %} 12072 12073 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 12074 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12075 match(Set dst (URShiftVS src shift)); 12076 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12077 ins_encode %{ 12078 int vector_len = 0; 12079 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12080 %} 12081 ins_pipe( pipe_slow ); 12082 %} 12083 12084 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 12085 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12086 match(Set dst (URShiftVS dst shift)); 12087 effect(TEMP src); 12088 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 12089 ins_encode %{ 12090 int vector_len = 0; 12091 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12092 %} 12093 ins_pipe( pipe_slow ); 12094 %} 12095 12096 instruct vsrl4S(vecD dst, vecS shift) %{ 12097 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12098 match(Set dst (URShiftVS dst shift)); 12099 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 12100 ins_encode %{ 12101 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 12102 %} 12103 ins_pipe( pipe_slow ); 12104 %} 12105 12106 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 12107 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12108 match(Set dst (URShiftVS dst shift)); 12109 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 12110 ins_encode %{ 12111 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 12112 %} 12113 ins_pipe( pipe_slow ); 12114 %} 12115 12116 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 12117 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12118 match(Set dst (URShiftVS src shift)); 12119 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12120 ins_encode %{ 12121 int vector_len = 0; 12122 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12123 %} 12124 ins_pipe( pipe_slow ); 12125 %} 12126 12127 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 12128 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12129 match(Set dst (URShiftVS src shift)); 12130 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12131 ins_encode %{ 12132 int vector_len = 0; 12133 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12134 %} 12135 ins_pipe( pipe_slow ); 12136 %} 12137 12138 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 12139 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12140 match(Set dst (URShiftVS dst shift)); 12141 effect(TEMP src); 12142 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12143 ins_encode %{ 12144 int vector_len = 0; 12145 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12146 %} 12147 ins_pipe( pipe_slow ); 12148 %} 12149 12150 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 12151 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12152 match(Set dst (URShiftVS src shift)); 12153 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12154 ins_encode %{ 12155 int vector_len = 0; 12156 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12157 %} 12158 ins_pipe( pipe_slow ); 12159 %} 12160 12161 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 12162 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12163 match(Set dst (URShiftVS src shift)); 12164 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12165 ins_encode %{ 12166 int vector_len = 0; 12167 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12168 %} 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 12173 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12174 match(Set dst (URShiftVS dst shift)); 12175 effect(TEMP src); 12176 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 12177 ins_encode %{ 12178 int vector_len = 0; 12179 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12180 %} 12181 ins_pipe( pipe_slow ); 12182 %} 12183 12184 instruct vsrl8S(vecX dst, vecS shift) %{ 12185 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12186 match(Set dst (URShiftVS dst shift)); 12187 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 12188 ins_encode %{ 12189 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 12190 %} 12191 ins_pipe( pipe_slow ); 12192 %} 12193 12194 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 12195 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12196 match(Set dst (URShiftVS dst shift)); 12197 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 12198 ins_encode %{ 12199 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 12200 %} 12201 ins_pipe( pipe_slow ); 12202 %} 12203 12204 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 12205 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12206 match(Set dst (URShiftVS src shift)); 12207 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12208 ins_encode %{ 12209 int vector_len = 0; 12210 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12211 %} 12212 ins_pipe( pipe_slow ); 12213 %} 12214 12215 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 12216 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12217 match(Set dst (URShiftVS src shift)); 12218 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12219 ins_encode %{ 12220 int vector_len = 0; 12221 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12222 %} 12223 ins_pipe( pipe_slow ); 12224 %} 12225 12226 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 12227 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12228 match(Set dst (URShiftVS dst shift)); 12229 effect(TEMP src); 12230 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12231 ins_encode %{ 12232 int vector_len = 0; 12233 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12234 %} 12235 ins_pipe( pipe_slow ); 12236 %} 12237 12238 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 12239 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12240 match(Set dst (URShiftVS src shift)); 12241 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12242 ins_encode %{ 12243 int vector_len = 0; 12244 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12245 %} 12246 ins_pipe( pipe_slow ); 12247 %} 12248 12249 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 12250 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12251 match(Set dst (URShiftVS src shift)); 12252 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12253 ins_encode %{ 12254 int vector_len = 0; 12255 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12256 %} 12257 ins_pipe( pipe_slow ); 12258 %} 12259 12260 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 12261 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12262 match(Set dst (URShiftVS dst shift)); 12263 effect(TEMP src); 12264 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 12265 ins_encode %{ 12266 int vector_len = 0; 12267 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12268 %} 12269 ins_pipe( pipe_slow ); 12270 %} 12271 12272 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 12273 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12274 match(Set dst (URShiftVS src shift)); 12275 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12276 ins_encode %{ 12277 int vector_len = 1; 12278 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12279 %} 12280 ins_pipe( pipe_slow ); 12281 %} 12282 12283 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 12284 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12285 match(Set dst (URShiftVS src shift)); 12286 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12287 ins_encode %{ 12288 int vector_len = 1; 12289 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12290 %} 12291 ins_pipe( pipe_slow ); 12292 %} 12293 12294 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 12295 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12296 match(Set dst (URShiftVS dst shift)); 12297 effect(TEMP src); 12298 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12299 ins_encode %{ 12300 int vector_len = 1; 12301 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12302 %} 12303 ins_pipe( pipe_slow ); 12304 %} 12305 12306 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 12307 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12308 match(Set dst (URShiftVS src shift)); 12309 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12310 ins_encode %{ 12311 int vector_len = 1; 12312 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12313 %} 12314 ins_pipe( pipe_slow ); 12315 %} 12316 12317 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 12318 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12319 match(Set dst (URShiftVS src shift)); 12320 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12321 ins_encode %{ 12322 int vector_len = 1; 12323 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12324 %} 12325 ins_pipe( pipe_slow ); 12326 %} 12327 12328 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 12329 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12330 match(Set dst (URShiftVS dst shift)); 12331 effect(TEMP src); 12332 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 12333 ins_encode %{ 12334 int vector_len = 1; 12335 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12336 %} 12337 ins_pipe( pipe_slow ); 12338 %} 12339 12340 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 12341 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12342 match(Set dst (URShiftVS src shift)); 12343 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 12344 ins_encode %{ 12345 int vector_len = 2; 12346 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12347 %} 12348 ins_pipe( pipe_slow ); 12349 %} 12350 12351 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12352 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12353 match(Set dst (URShiftVS src shift)); 12354 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 12355 ins_encode %{ 12356 int vector_len = 2; 12357 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12358 %} 12359 ins_pipe( pipe_slow ); 12360 %} 12361 12362 // Integers vector logical right shift 12363 instruct vsrl2I(vecD dst, vecS shift) %{ 12364 predicate(n->as_Vector()->length() == 2); 12365 match(Set dst (URShiftVI dst shift)); 12366 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 12367 ins_encode %{ 12368 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 12369 %} 12370 ins_pipe( pipe_slow ); 12371 %} 12372 12373 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 12374 predicate(n->as_Vector()->length() == 2); 12375 match(Set dst (URShiftVI dst shift)); 12376 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 12377 ins_encode %{ 12378 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 12379 %} 12380 ins_pipe( pipe_slow ); 12381 %} 12382 12383 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 12384 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12385 match(Set dst (URShiftVI src shift)); 12386 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 12387 ins_encode %{ 12388 int vector_len = 0; 12389 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12390 %} 12391 ins_pipe( pipe_slow ); 12392 %} 12393 12394 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 12395 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12396 match(Set dst (URShiftVI src shift)); 12397 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 12398 ins_encode %{ 12399 int vector_len = 0; 12400 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12401 %} 12402 ins_pipe( pipe_slow ); 12403 %} 12404 12405 instruct vsrl4I(vecX dst, vecS shift) %{ 12406 predicate(n->as_Vector()->length() == 4); 12407 match(Set dst (URShiftVI dst shift)); 12408 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 12409 ins_encode %{ 12410 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 12411 %} 12412 ins_pipe( pipe_slow ); 12413 %} 12414 12415 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 12416 predicate(n->as_Vector()->length() == 4); 12417 match(Set dst (URShiftVI dst shift)); 12418 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 12419 ins_encode %{ 12420 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 12421 %} 12422 ins_pipe( pipe_slow ); 12423 %} 12424 12425 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 12426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12427 match(Set dst (URShiftVI src shift)); 12428 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 12429 ins_encode %{ 12430 int vector_len = 0; 12431 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12432 %} 12433 ins_pipe( pipe_slow ); 12434 %} 12435 12436 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 12437 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12438 match(Set dst (URShiftVI src shift)); 12439 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 12440 ins_encode %{ 12441 int vector_len = 0; 12442 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12443 %} 12444 ins_pipe( pipe_slow ); 12445 %} 12446 12447 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 12448 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12449 match(Set dst (URShiftVI src shift)); 12450 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 12451 ins_encode %{ 12452 int vector_len = 1; 12453 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12454 %} 12455 ins_pipe( pipe_slow ); 12456 %} 12457 12458 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 12459 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12460 match(Set dst (URShiftVI src shift)); 12461 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 12462 ins_encode %{ 12463 int vector_len = 1; 12464 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12465 %} 12466 ins_pipe( pipe_slow ); 12467 %} 12468 12469 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 12470 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12471 match(Set dst (URShiftVI src shift)); 12472 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 12473 ins_encode %{ 12474 int vector_len = 2; 12475 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12476 %} 12477 ins_pipe( pipe_slow ); 12478 %} 12479 12480 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12481 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12482 match(Set dst (URShiftVI src shift)); 12483 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 12484 ins_encode %{ 12485 int vector_len = 2; 12486 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12487 %} 12488 ins_pipe( pipe_slow ); 12489 %} 12490 12491 // Longs vector logical right shift 12492 instruct vsrl2L(vecX dst, vecS shift) %{ 12493 predicate(n->as_Vector()->length() == 2); 12494 match(Set dst (URShiftVL dst shift)); 12495 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 12496 ins_encode %{ 12497 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 12498 %} 12499 ins_pipe( pipe_slow ); 12500 %} 12501 12502 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 12503 predicate(n->as_Vector()->length() == 2); 12504 match(Set dst (URShiftVL dst shift)); 12505 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 12506 ins_encode %{ 12507 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 12508 %} 12509 ins_pipe( pipe_slow ); 12510 %} 12511 12512 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 12513 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12514 match(Set dst (URShiftVL src shift)); 12515 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 12516 ins_encode %{ 12517 int vector_len = 0; 12518 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12519 %} 12520 ins_pipe( pipe_slow ); 12521 %} 12522 12523 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 12524 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12525 match(Set dst (URShiftVL src shift)); 12526 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 12527 ins_encode %{ 12528 int vector_len = 0; 12529 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12530 %} 12531 ins_pipe( pipe_slow ); 12532 %} 12533 12534 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 12535 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12536 match(Set dst (URShiftVL src shift)); 12537 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 12538 ins_encode %{ 12539 int vector_len = 1; 12540 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12541 %} 12542 ins_pipe( pipe_slow ); 12543 %} 12544 12545 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 12546 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12547 match(Set dst (URShiftVL src shift)); 12548 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 12549 ins_encode %{ 12550 int vector_len = 1; 12551 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12552 %} 12553 ins_pipe( pipe_slow ); 12554 %} 12555 12556 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 12557 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12558 match(Set dst (URShiftVL src shift)); 12559 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 12560 ins_encode %{ 12561 int vector_len = 2; 12562 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12563 %} 12564 ins_pipe( pipe_slow ); 12565 %} 12566 12567 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12568 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12569 match(Set dst (URShiftVL src shift)); 12570 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 12571 ins_encode %{ 12572 int vector_len = 2; 12573 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12574 %} 12575 ins_pipe( pipe_slow ); 12576 %} 12577 12578 // ------------------- ArithmeticRightShift ----------------------------------- 12579 12580 // Shorts/Chars vector arithmetic right shift 12581 instruct vsra2S(vecS dst, vecS shift) %{ 12582 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12583 match(Set dst (RShiftVS dst shift)); 12584 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 12585 ins_encode %{ 12586 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 12587 %} 12588 ins_pipe( pipe_slow ); 12589 %} 12590 12591 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 12592 predicate(n->as_Vector()->length() == 2); 12593 match(Set dst (RShiftVS dst shift)); 12594 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 12595 ins_encode %{ 12596 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 12597 %} 12598 ins_pipe( pipe_slow ); 12599 %} 12600 12601 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 12602 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12603 match(Set dst (RShiftVS src shift)); 12604 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12605 ins_encode %{ 12606 int vector_len = 0; 12607 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12608 %} 12609 ins_pipe( pipe_slow ); 12610 %} 12611 12612 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 12613 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12614 match(Set dst (RShiftVS src shift)); 12615 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12616 ins_encode %{ 12617 int vector_len = 0; 12618 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12619 %} 12620 ins_pipe( pipe_slow ); 12621 %} 12622 12623 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 12624 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12625 match(Set dst (RShiftVS dst shift)); 12626 effect(TEMP src); 12627 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12628 ins_encode %{ 12629 int vector_len = 0; 12630 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12631 %} 12632 ins_pipe( pipe_slow ); 12633 %} 12634 12635 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 12636 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12637 match(Set dst (RShiftVS src shift)); 12638 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12639 ins_encode %{ 12640 int vector_len = 0; 12641 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12642 %} 12643 ins_pipe( pipe_slow ); 12644 %} 12645 12646 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 12647 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12648 match(Set dst (RShiftVS src shift)); 12649 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12650 ins_encode %{ 12651 int vector_len = 0; 12652 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12653 %} 12654 ins_pipe( pipe_slow ); 12655 %} 12656 12657 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 12658 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12659 match(Set dst (RShiftVS dst shift)); 12660 effect(TEMP src); 12661 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 12662 ins_encode %{ 12663 int vector_len = 0; 12664 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12665 %} 12666 ins_pipe( pipe_slow ); 12667 %} 12668 12669 instruct vsra4S(vecD dst, vecS shift) %{ 12670 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12671 match(Set dst (RShiftVS dst shift)); 12672 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 12673 ins_encode %{ 12674 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 12675 %} 12676 ins_pipe( pipe_slow ); 12677 %} 12678 12679 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 12680 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12681 match(Set dst (RShiftVS dst shift)); 12682 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 12683 ins_encode %{ 12684 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 12685 %} 12686 ins_pipe( pipe_slow ); 12687 %} 12688 12689 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 12690 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12691 match(Set dst (RShiftVS src shift)); 12692 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12693 ins_encode %{ 12694 int vector_len = 0; 12695 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12696 %} 12697 ins_pipe( pipe_slow ); 12698 %} 12699 12700 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 12701 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12702 match(Set dst (RShiftVS src shift)); 12703 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12704 ins_encode %{ 12705 int vector_len = 0; 12706 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12707 %} 12708 ins_pipe( pipe_slow ); 12709 %} 12710 12711 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 12712 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12713 match(Set dst (RShiftVS dst shift)); 12714 effect(TEMP src); 12715 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12716 ins_encode %{ 12717 int vector_len = 0; 12718 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12719 %} 12720 ins_pipe( pipe_slow ); 12721 %} 12722 12723 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 12724 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12725 match(Set dst (RShiftVS src shift)); 12726 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12727 ins_encode %{ 12728 int vector_len = 0; 12729 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12730 %} 12731 ins_pipe( pipe_slow ); 12732 %} 12733 12734 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 12735 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12736 match(Set dst (RShiftVS src shift)); 12737 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12738 ins_encode %{ 12739 int vector_len = 0; 12740 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12741 %} 12742 ins_pipe( pipe_slow ); 12743 %} 12744 12745 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 12746 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12747 match(Set dst (RShiftVS dst shift)); 12748 effect(TEMP src); 12749 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 12750 ins_encode %{ 12751 int vector_len = 0; 12752 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12753 %} 12754 ins_pipe( pipe_slow ); 12755 %} 12756 12757 instruct vsra8S(vecX dst, vecS shift) %{ 12758 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12759 match(Set dst (RShiftVS dst shift)); 12760 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 12761 ins_encode %{ 12762 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 12763 %} 12764 ins_pipe( pipe_slow ); 12765 %} 12766 12767 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 12768 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12769 match(Set dst (RShiftVS dst shift)); 12770 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 12771 ins_encode %{ 12772 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 12773 %} 12774 ins_pipe( pipe_slow ); 12775 %} 12776 12777 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 12778 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12779 match(Set dst (RShiftVS src shift)); 12780 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12781 ins_encode %{ 12782 int vector_len = 0; 12783 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12784 %} 12785 ins_pipe( pipe_slow ); 12786 %} 12787 12788 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 12789 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12790 match(Set dst (RShiftVS src shift)); 12791 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12792 ins_encode %{ 12793 int vector_len = 0; 12794 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12795 %} 12796 ins_pipe( pipe_slow ); 12797 %} 12798 12799 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 12800 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12801 match(Set dst (RShiftVS dst shift)); 12802 effect(TEMP src); 12803 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12804 ins_encode %{ 12805 int vector_len = 0; 12806 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12807 %} 12808 ins_pipe( pipe_slow ); 12809 %} 12810 12811 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 12812 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12813 match(Set dst (RShiftVS src shift)); 12814 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12815 ins_encode %{ 12816 int vector_len = 0; 12817 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12818 %} 12819 ins_pipe( pipe_slow ); 12820 %} 12821 12822 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 12823 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12824 match(Set dst (RShiftVS src shift)); 12825 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12826 ins_encode %{ 12827 int vector_len = 0; 12828 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12829 %} 12830 ins_pipe( pipe_slow ); 12831 %} 12832 12833 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 12834 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12835 match(Set dst (RShiftVS dst shift)); 12836 effect(TEMP src); 12837 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 12838 ins_encode %{ 12839 int vector_len = 0; 12840 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12841 %} 12842 ins_pipe( pipe_slow ); 12843 %} 12844 12845 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 12846 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12847 match(Set dst (RShiftVS src shift)); 12848 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12849 ins_encode %{ 12850 int vector_len = 1; 12851 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12852 %} 12853 ins_pipe( pipe_slow ); 12854 %} 12855 12856 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 12857 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12858 match(Set dst (RShiftVS src shift)); 12859 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12860 ins_encode %{ 12861 int vector_len = 1; 12862 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12863 %} 12864 ins_pipe( pipe_slow ); 12865 %} 12866 12867 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 12868 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12869 match(Set dst (RShiftVS dst shift)); 12870 effect(TEMP src); 12871 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12872 ins_encode %{ 12873 int vector_len = 1; 12874 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12875 %} 12876 ins_pipe( pipe_slow ); 12877 %} 12878 12879 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 12880 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 12881 match(Set dst (RShiftVS src shift)); 12882 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12883 ins_encode %{ 12884 int vector_len = 1; 12885 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12886 %} 12887 ins_pipe( pipe_slow ); 12888 %} 12889 12890 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 12891 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12892 match(Set dst (RShiftVS src shift)); 12893 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12894 ins_encode %{ 12895 int vector_len = 1; 12896 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12897 %} 12898 ins_pipe( pipe_slow ); 12899 %} 12900 12901 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 12902 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12903 match(Set dst (RShiftVS dst shift)); 12904 effect(TEMP src); 12905 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 12906 ins_encode %{ 12907 int vector_len = 1; 12908 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12909 %} 12910 ins_pipe( pipe_slow ); 12911 %} 12912 12913 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 12914 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12915 match(Set dst (RShiftVS src shift)); 12916 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 12917 ins_encode %{ 12918 int vector_len = 2; 12919 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12920 %} 12921 ins_pipe( pipe_slow ); 12922 %} 12923 12924 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 12925 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12926 match(Set dst (RShiftVS src shift)); 12927 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 12928 ins_encode %{ 12929 int vector_len = 2; 12930 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12931 %} 12932 ins_pipe( pipe_slow ); 12933 %} 12934 12935 // Integers vector arithmetic right shift 12936 instruct vsra2I(vecD dst, vecS shift) %{ 12937 predicate(n->as_Vector()->length() == 2); 12938 match(Set dst (RShiftVI dst shift)); 12939 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 12940 ins_encode %{ 12941 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 12942 %} 12943 ins_pipe( pipe_slow ); 12944 %} 12945 12946 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 12947 predicate(n->as_Vector()->length() == 2); 12948 match(Set dst (RShiftVI dst shift)); 12949 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 12950 ins_encode %{ 12951 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 12952 %} 12953 ins_pipe( pipe_slow ); 12954 %} 12955 12956 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 12957 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12958 match(Set dst (RShiftVI src shift)); 12959 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 12960 ins_encode %{ 12961 int vector_len = 0; 12962 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 12963 %} 12964 ins_pipe( pipe_slow ); 12965 %} 12966 12967 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 12968 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12969 match(Set dst (RShiftVI src shift)); 12970 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 12971 ins_encode %{ 12972 int vector_len = 0; 12973 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 12974 %} 12975 ins_pipe( pipe_slow ); 12976 %} 12977 12978 instruct vsra4I(vecX dst, vecS shift) %{ 12979 predicate(n->as_Vector()->length() == 4); 12980 match(Set dst (RShiftVI dst shift)); 12981 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 12982 ins_encode %{ 12983 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 12984 %} 12985 ins_pipe( pipe_slow ); 12986 %} 12987 12988 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 12989 predicate(n->as_Vector()->length() == 4); 12990 match(Set dst (RShiftVI dst shift)); 12991 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 12992 ins_encode %{ 12993 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 12994 %} 12995 ins_pipe( pipe_slow ); 12996 %} 12997 12998 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 12999 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13000 match(Set dst (RShiftVI src shift)); 13001 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 13002 ins_encode %{ 13003 int vector_len = 0; 13004 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13005 %} 13006 ins_pipe( pipe_slow ); 13007 %} 13008 13009 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 13010 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13011 match(Set dst (RShiftVI src shift)); 13012 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 13013 ins_encode %{ 13014 int vector_len = 0; 13015 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13016 %} 13017 ins_pipe( pipe_slow ); 13018 %} 13019 13020 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 13021 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 13022 match(Set dst (RShiftVI src shift)); 13023 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 13024 ins_encode %{ 13025 int vector_len = 1; 13026 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13027 %} 13028 ins_pipe( pipe_slow ); 13029 %} 13030 13031 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 13032 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 13033 match(Set dst (RShiftVI src shift)); 13034 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 13035 ins_encode %{ 13036 int vector_len = 1; 13037 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13038 %} 13039 ins_pipe( pipe_slow ); 13040 %} 13041 13042 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 13043 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13044 match(Set dst (RShiftVI src shift)); 13045 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 13046 ins_encode %{ 13047 int vector_len = 2; 13048 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13049 %} 13050 ins_pipe( pipe_slow ); 13051 %} 13052 13053 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 13054 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13055 match(Set dst (RShiftVI src shift)); 13056 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 13057 ins_encode %{ 13058 int vector_len = 2; 13059 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 13060 %} 13061 ins_pipe( pipe_slow ); 13062 %} 13063 13064 // There are no longs vector arithmetic right shift instructions. 13065 13066 // ------------------- Variable Bit Shift Left Logical ----------------------------- 13067 //Integer Variable left shift 13068 instruct vsllv2I(vecD dst, vecD src, vecD shift) %{ 13069 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 13070 match(Set dst (LShiftVI src shift)); 13071 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed2I" %} 13072 ins_encode %{ 13073 int vector_len = 0; 13074 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13075 %} 13076 ins_pipe( pipe_slow ); 13077 %} 13078 13079 instruct vsllv4I_reg(vecX dst, vecX src, vecX shift) %{ 13080 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 13081 match(Set dst (LShiftVI src shift)); 13082 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 13083 ins_encode %{ 13084 int vector_len = 0; 13085 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13086 %} 13087 ins_pipe( pipe_slow ); 13088 %} 13089 13090 instruct vsllv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 13091 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 13092 match(Set dst (LShiftVI src shift)); 13093 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 13094 ins_encode %{ 13095 int vector_len = 0; 13096 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13097 %} 13098 ins_pipe( pipe_slow ); 13099 %} 13100 13101 instruct vsllv8I_reg(vecY dst, vecY src, vecY shift) %{ 13102 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13103 match(Set dst (LShiftVI src shift)); 13104 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 13105 ins_encode %{ 13106 int vector_len = 1; 13107 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13108 %} 13109 ins_pipe( pipe_slow ); 13110 %} 13111 13112 instruct vsllv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 13113 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13114 match(Set dst (LShiftVI src shift)); 13115 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 13116 ins_encode %{ 13117 int vector_len = 1; 13118 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13119 %} 13120 ins_pipe( pipe_slow ); 13121 %} 13122 13123 instruct vsllv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13124 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_LShiftCntV); 13125 match(Set dst (LShiftVI src shift)); 13126 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 13127 ins_encode %{ 13128 int vector_len = 2; 13129 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13130 %} 13131 ins_pipe( pipe_slow ); 13132 %} 13133 13134 //Long Variable left shift 13135 instruct vsllv1L_reg(vecD dst, vecD src, vecD shift) %{ 13136 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_LShiftCntV); 13137 match(Set dst (LShiftVL src shift)); 13138 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed1L" %} 13139 ins_encode %{ 13140 int vector_len = 0; 13141 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13142 %} 13143 ins_pipe( pipe_slow ); 13144 %} 13145 13146 instruct vsllv2L_reg(vecX dst, vecX src, vecX shift) %{ 13147 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 13148 match(Set dst (LShiftVL src shift)); 13149 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 13150 ins_encode %{ 13151 int vector_len = 0; 13152 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13153 %} 13154 ins_pipe( pipe_slow ); 13155 %} 13156 13157 instruct vsllv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 13158 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 13159 match(Set dst (LShiftVL src shift)); 13160 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 13161 ins_encode %{ 13162 int vector_len = 0; 13163 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13164 %} 13165 ins_pipe( pipe_slow ); 13166 %} 13167 13168 instruct vsllv4L_reg(vecY dst, vecY src, vecY shift) %{ 13169 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 13170 match(Set dst (LShiftVL src shift)); 13171 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 13172 ins_encode %{ 13173 int vector_len = 1; 13174 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13175 %} 13176 ins_pipe( pipe_slow ); 13177 %} 13178 13179 instruct vsllv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 13180 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13181 match(Set dst (LShiftVL src shift)); 13182 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 13183 ins_encode %{ 13184 int vector_len = 1; 13185 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13186 %} 13187 ins_pipe( pipe_slow ); 13188 %} 13189 13190 instruct vsllv8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13191 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 13192 match(Set dst (LShiftVL src shift)); 13193 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 13194 ins_encode %{ 13195 int vector_len = 2; 13196 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13197 %} 13198 ins_pipe( pipe_slow ); 13199 %} 13200 13201 // ------------------- Variable Bit Shift Right Logical ----------------------------- 13202 //Integer Variable right shift 13203 instruct vsrlv2I_reg(vecD dst, vecD src, vecD shift) %{ 13204 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13205 match(Set dst (URShiftVI src shift)); 13206 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 13207 ins_encode %{ 13208 int vector_len = 0; 13209 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13210 %} 13211 ins_pipe( pipe_slow ); 13212 %} 13213 13214 instruct vsrlv4I_reg(vecX dst, vecX src, vecX shift) %{ 13215 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13216 match(Set dst (URShiftVI src shift)); 13217 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13218 ins_encode %{ 13219 int vector_len = 0; 13220 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13221 %} 13222 ins_pipe( pipe_slow ); 13223 %} 13224 13225 instruct vsrlv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 13226 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13227 match(Set dst (URShiftVI src shift)); 13228 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13229 ins_encode %{ 13230 int vector_len = 0; 13231 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13232 %} 13233 ins_pipe( pipe_slow ); 13234 %} 13235 13236 instruct vsrlv8I_reg(vecY dst, vecY src, vecY shift) %{ 13237 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13238 match(Set dst (URShiftVI src shift)); 13239 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13240 ins_encode %{ 13241 int vector_len = 1; 13242 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13243 %} 13244 ins_pipe( pipe_slow ); 13245 %} 13246 13247 instruct vsrlv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 13248 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13249 match(Set dst (URShiftVI src shift)); 13250 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13251 ins_encode %{ 13252 int vector_len = 1; 13253 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13254 %} 13255 ins_pipe( pipe_slow ); 13256 %} 13257 13258 instruct vsrlv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13259 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 13260 match(Set dst (URShiftVI src shift)); 13261 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 13262 ins_encode %{ 13263 int vector_len = 2; 13264 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13265 %} 13266 ins_pipe( pipe_slow ); 13267 %} 13268 13269 //Long Variable right shift 13270 instruct vsrlv1L_reg(vecD dst, vecD src, vecD shift) %{ 13271 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 13272 match(Set dst (URShiftVL src shift)); 13273 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} 13274 ins_encode %{ 13275 int vector_len = 0; 13276 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13277 %} 13278 ins_pipe( pipe_slow ); 13279 %} 13280 13281 instruct vsrlv2L_reg(vecX dst, vecX src, vecX shift) %{ 13282 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13283 match(Set dst (URShiftVL src shift)); 13284 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13285 ins_encode %{ 13286 int vector_len = 0; 13287 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13288 %} 13289 ins_pipe( pipe_slow ); 13290 %} 13291 13292 instruct vsrlv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 13293 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13294 match(Set dst (URShiftVL src shift)); 13295 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13296 ins_encode %{ 13297 int vector_len = 0; 13298 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13299 %} 13300 ins_pipe( pipe_slow ); 13301 %} 13302 13303 instruct vsrlv4L_reg(vecY dst, vecY src, vecY shift) %{ 13304 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13305 match(Set dst (URShiftVL src shift)); 13306 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13307 ins_encode %{ 13308 int vector_len = 1; 13309 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13310 %} 13311 ins_pipe( pipe_slow ); 13312 %} 13313 13314 instruct vsrlv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 13315 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13316 match(Set dst (URShiftVL src shift)); 13317 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13318 ins_encode %{ 13319 int vector_len = 1; 13320 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13321 %} 13322 ins_pipe( pipe_slow ); 13323 %} 13324 13325 instruct vsrlv8L_reg(vecZ dst, vecZ src, vecZ shift) %{ 13326 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13327 match(Set dst (URShiftVL src shift)); 13328 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 13329 ins_encode %{ 13330 int vector_len = 2; 13331 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13332 %} 13333 ins_pipe( pipe_slow ); 13334 %} 13335 13336 // ------------------- Variable Bit Shift Right Arithmetic ----------------------------- 13337 //Integer Variable right shift 13338 instruct vsrav2I_reg(vecD dst, vecD src, vecD shift) %{ 13339 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13340 match(Set dst (RShiftVI src shift)); 13341 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 13342 ins_encode %{ 13343 int vector_len = 0; 13344 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13345 %} 13346 ins_pipe( pipe_slow ); 13347 %} 13348 13349 instruct vsrav4I_reg(vecX dst, vecX src, vecX shift) %{ 13350 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13351 match(Set dst (RShiftVI src shift)); 13352 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13353 ins_encode %{ 13354 int vector_len = 0; 13355 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13356 %} 13357 ins_pipe( pipe_slow ); 13358 %} 13359 13360 instruct vsrav4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 13361 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13362 match(Set dst (RShiftVI src shift)); 13363 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 13364 ins_encode %{ 13365 int vector_len = 0; 13366 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13367 %} 13368 ins_pipe( pipe_slow ); 13369 %} 13370 13371 instruct vsrav8I_reg(vecY dst, vecY src, vecY shift) %{ 13372 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13373 match(Set dst (RShiftVI src shift)); 13374 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13375 ins_encode %{ 13376 int vector_len = 1; 13377 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13378 %} 13379 ins_pipe( pipe_slow ); 13380 %} 13381 13382 instruct vsrav8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 13383 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13384 match(Set dst (RShiftVI src shift)); 13385 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 13386 ins_encode %{ 13387 int vector_len = 1; 13388 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13389 %} 13390 ins_pipe( pipe_slow ); 13391 %} 13392 13393 instruct vsrav16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 13394 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 13395 match(Set dst (RShiftVI src shift)); 13396 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 13397 ins_encode %{ 13398 int vector_len = 2; 13399 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13400 %} 13401 ins_pipe( pipe_slow ); 13402 %} 13403 13404 //Long Variable right shift arithmetic 13405 instruct vsrav1L_reg(vecD dst, vecD src, vecD shift) %{ 13406 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 13407 match(Set dst (RShiftVL src shift)); 13408 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} 13409 ins_encode %{ 13410 int vector_len = 0; 13411 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13412 %} 13413 ins_pipe( pipe_slow ); 13414 %} 13415 13416 instruct vsrav2L_reg(vecX dst, vecX src, vecX shift) %{ 13417 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13418 match(Set dst (RShiftVL src shift)); 13419 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13420 ins_encode %{ 13421 int vector_len = 0; 13422 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13423 %} 13424 ins_pipe( pipe_slow ); 13425 %} 13426 13427 instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 13428 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 13429 match(Set dst (RShiftVL src shift)); 13430 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 13431 ins_encode %{ 13432 int vector_len = 0; 13433 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13434 %} 13435 ins_pipe( pipe_slow ); 13436 %} 13437 13438 instruct vsrav4L_reg(vecY dst, vecY src, vecY shift) %{ 13439 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13440 match(Set dst (RShiftVL src shift)); 13441 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13442 ins_encode %{ 13443 int vector_len = 1; 13444 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13445 %} 13446 ins_pipe( pipe_slow ); 13447 %} 13448 13449 instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 13450 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 13451 match(Set dst (RShiftVL src shift)); 13452 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 13453 ins_encode %{ 13454 int vector_len = 1; 13455 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13456 %} 13457 ins_pipe( pipe_slow ); 13458 %} 13459 13460 instruct vsrav8L_reg(vecZ dst, vecZ src, vecZ shift) %{ 13461 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 13462 match(Set dst (RShiftVL src shift)); 13463 format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 13464 ins_encode %{ 13465 int vector_len = 2; 13466 __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 13467 %} 13468 ins_pipe( pipe_slow ); 13469 %} 13470 13471 // --------------------------------- AND -------------------------------------- 13472 13473 instruct vand4B(vecS dst, vecS src) %{ 13474 predicate(n->as_Vector()->length_in_bytes() == 4); 13475 match(Set dst (AndV dst src)); 13476 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 13477 ins_encode %{ 13478 __ pand($dst$$XMMRegister, $src$$XMMRegister); 13479 %} 13480 ins_pipe( pipe_slow ); 13481 %} 13482 13483 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 13484 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13485 match(Set dst (AndV src1 src2)); 13486 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 13487 ins_encode %{ 13488 int vector_len = 0; 13489 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13490 %} 13491 ins_pipe( pipe_slow ); 13492 %} 13493 13494 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 13495 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13496 match(Set dst (AndV src (LoadVector mem))); 13497 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 13498 ins_encode %{ 13499 int vector_len = 0; 13500 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13501 %} 13502 ins_pipe( pipe_slow ); 13503 %} 13504 13505 instruct vand8B(vecD dst, vecD src) %{ 13506 predicate(n->as_Vector()->length_in_bytes() == 8); 13507 match(Set dst (AndV dst src)); 13508 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 13509 ins_encode %{ 13510 __ pand($dst$$XMMRegister, $src$$XMMRegister); 13511 %} 13512 ins_pipe( pipe_slow ); 13513 %} 13514 13515 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 13516 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13517 match(Set dst (AndV src1 src2)); 13518 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 13519 ins_encode %{ 13520 int vector_len = 0; 13521 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13522 %} 13523 ins_pipe( pipe_slow ); 13524 %} 13525 13526 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 13527 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13528 match(Set dst (AndV src (LoadVector mem))); 13529 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 13530 ins_encode %{ 13531 int vector_len = 0; 13532 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13533 %} 13534 ins_pipe( pipe_slow ); 13535 %} 13536 13537 instruct vand16B(vecX dst, vecX src) %{ 13538 predicate(n->as_Vector()->length_in_bytes() == 16); 13539 match(Set dst (AndV dst src)); 13540 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 13541 ins_encode %{ 13542 __ pand($dst$$XMMRegister, $src$$XMMRegister); 13543 %} 13544 ins_pipe( pipe_slow ); 13545 %} 13546 13547 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 13548 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13549 match(Set dst (AndV src1 src2)); 13550 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 13551 ins_encode %{ 13552 int vector_len = 0; 13553 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13554 %} 13555 ins_pipe( pipe_slow ); 13556 %} 13557 13558 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 13559 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13560 match(Set dst (AndV src (LoadVector mem))); 13561 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 13562 ins_encode %{ 13563 int vector_len = 0; 13564 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13565 %} 13566 ins_pipe( pipe_slow ); 13567 %} 13568 13569 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 13570 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13571 match(Set dst (AndV src1 src2)); 13572 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 13573 ins_encode %{ 13574 int vector_len = 1; 13575 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13576 %} 13577 ins_pipe( pipe_slow ); 13578 %} 13579 13580 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 13581 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13582 match(Set dst (AndV src (LoadVector mem))); 13583 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 13584 ins_encode %{ 13585 int vector_len = 1; 13586 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13587 %} 13588 ins_pipe( pipe_slow ); 13589 %} 13590 13591 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13592 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13593 match(Set dst (AndV src1 src2)); 13594 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 13595 ins_encode %{ 13596 int vector_len = 2; 13597 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13598 %} 13599 ins_pipe( pipe_slow ); 13600 %} 13601 13602 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 13603 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13604 match(Set dst (AndV src (LoadVector mem))); 13605 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 13606 ins_encode %{ 13607 int vector_len = 2; 13608 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13609 %} 13610 ins_pipe( pipe_slow ); 13611 %} 13612 13613 // --------------------------------- OR --------------------------------------- 13614 13615 instruct vor4B(vecS dst, vecS src) %{ 13616 predicate(n->as_Vector()->length_in_bytes() == 4); 13617 match(Set dst (OrV dst src)); 13618 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 13619 ins_encode %{ 13620 __ por($dst$$XMMRegister, $src$$XMMRegister); 13621 %} 13622 ins_pipe( pipe_slow ); 13623 %} 13624 13625 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 13626 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13627 match(Set dst (OrV src1 src2)); 13628 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 13629 ins_encode %{ 13630 int vector_len = 0; 13631 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13632 %} 13633 ins_pipe( pipe_slow ); 13634 %} 13635 13636 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 13637 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13638 match(Set dst (OrV src (LoadVector mem))); 13639 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 13640 ins_encode %{ 13641 int vector_len = 0; 13642 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13643 %} 13644 ins_pipe( pipe_slow ); 13645 %} 13646 13647 instruct vor8B(vecD dst, vecD src) %{ 13648 predicate(n->as_Vector()->length_in_bytes() == 8); 13649 match(Set dst (OrV dst src)); 13650 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 13651 ins_encode %{ 13652 __ por($dst$$XMMRegister, $src$$XMMRegister); 13653 %} 13654 ins_pipe( pipe_slow ); 13655 %} 13656 13657 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 13658 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13659 match(Set dst (OrV src1 src2)); 13660 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 13661 ins_encode %{ 13662 int vector_len = 0; 13663 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13664 %} 13665 ins_pipe( pipe_slow ); 13666 %} 13667 13668 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 13669 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13670 match(Set dst (OrV src (LoadVector mem))); 13671 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 13672 ins_encode %{ 13673 int vector_len = 0; 13674 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13675 %} 13676 ins_pipe( pipe_slow ); 13677 %} 13678 13679 instruct vor16B(vecX dst, vecX src) %{ 13680 predicate(n->as_Vector()->length_in_bytes() == 16); 13681 match(Set dst (OrV dst src)); 13682 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 13683 ins_encode %{ 13684 __ por($dst$$XMMRegister, $src$$XMMRegister); 13685 %} 13686 ins_pipe( pipe_slow ); 13687 %} 13688 13689 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 13690 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13691 match(Set dst (OrV src1 src2)); 13692 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 13693 ins_encode %{ 13694 int vector_len = 0; 13695 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13696 %} 13697 ins_pipe( pipe_slow ); 13698 %} 13699 13700 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 13701 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13702 match(Set dst (OrV src (LoadVector mem))); 13703 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 13704 ins_encode %{ 13705 int vector_len = 0; 13706 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13707 %} 13708 ins_pipe( pipe_slow ); 13709 %} 13710 13711 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 13712 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13713 match(Set dst (OrV src1 src2)); 13714 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 13715 ins_encode %{ 13716 int vector_len = 1; 13717 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13718 %} 13719 ins_pipe( pipe_slow ); 13720 %} 13721 13722 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 13723 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13724 match(Set dst (OrV src (LoadVector mem))); 13725 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 13726 ins_encode %{ 13727 int vector_len = 1; 13728 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13729 %} 13730 ins_pipe( pipe_slow ); 13731 %} 13732 13733 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13734 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13735 match(Set dst (OrV src1 src2)); 13736 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 13737 ins_encode %{ 13738 int vector_len = 2; 13739 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13740 %} 13741 ins_pipe( pipe_slow ); 13742 %} 13743 13744 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 13745 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13746 match(Set dst (OrV src (LoadVector mem))); 13747 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 13748 ins_encode %{ 13749 int vector_len = 2; 13750 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13751 %} 13752 ins_pipe( pipe_slow ); 13753 %} 13754 13755 // --------------------------------- XOR -------------------------------------- 13756 13757 instruct vxor4B(vecS dst, vecS src) %{ 13758 predicate(n->as_Vector()->length_in_bytes() == 4); 13759 match(Set dst (XorV dst src)); 13760 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 13761 ins_encode %{ 13762 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 13763 %} 13764 ins_pipe( pipe_slow ); 13765 %} 13766 13767 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 13768 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13769 match(Set dst (XorV src1 src2)); 13770 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 13771 ins_encode %{ 13772 int vector_len = 0; 13773 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13774 %} 13775 ins_pipe( pipe_slow ); 13776 %} 13777 13778 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 13779 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 13780 match(Set dst (XorV src (LoadVector mem))); 13781 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 13782 ins_encode %{ 13783 int vector_len = 0; 13784 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13785 %} 13786 ins_pipe( pipe_slow ); 13787 %} 13788 13789 instruct vxor8B(vecD dst, vecD src) %{ 13790 predicate(n->as_Vector()->length_in_bytes() == 8); 13791 match(Set dst (XorV dst src)); 13792 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 13793 ins_encode %{ 13794 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 13795 %} 13796 ins_pipe( pipe_slow ); 13797 %} 13798 13799 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 13800 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13801 match(Set dst (XorV src1 src2)); 13802 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 13803 ins_encode %{ 13804 int vector_len = 0; 13805 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13806 %} 13807 ins_pipe( pipe_slow ); 13808 %} 13809 13810 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 13811 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 13812 match(Set dst (XorV src (LoadVector mem))); 13813 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 13814 ins_encode %{ 13815 int vector_len = 0; 13816 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13817 %} 13818 ins_pipe( pipe_slow ); 13819 %} 13820 13821 instruct vxor16B(vecX dst, vecX src) %{ 13822 predicate(n->as_Vector()->length_in_bytes() == 16); 13823 match(Set dst (XorV dst src)); 13824 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 13825 ins_encode %{ 13826 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 13827 %} 13828 ins_pipe( pipe_slow ); 13829 %} 13830 13831 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 13832 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13833 match(Set dst (XorV src1 src2)); 13834 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 13835 ins_encode %{ 13836 int vector_len = 0; 13837 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13838 %} 13839 ins_pipe( pipe_slow ); 13840 %} 13841 13842 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 13843 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13844 match(Set dst (XorV src (LoadVector mem))); 13845 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 13846 ins_encode %{ 13847 int vector_len = 0; 13848 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13849 %} 13850 ins_pipe( pipe_slow ); 13851 %} 13852 13853 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 13854 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13855 match(Set dst (XorV src1 src2)); 13856 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 13857 ins_encode %{ 13858 int vector_len = 1; 13859 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13860 %} 13861 ins_pipe( pipe_slow ); 13862 %} 13863 13864 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 13865 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 13866 match(Set dst (XorV src (LoadVector mem))); 13867 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 13868 ins_encode %{ 13869 int vector_len = 1; 13870 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13871 %} 13872 ins_pipe( pipe_slow ); 13873 %} 13874 13875 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13876 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13877 match(Set dst (XorV src1 src2)); 13878 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 13879 ins_encode %{ 13880 int vector_len = 2; 13881 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13882 %} 13883 ins_pipe( pipe_slow ); 13884 %} 13885 13886 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 13887 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13888 match(Set dst (XorV src (LoadVector mem))); 13889 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 13890 ins_encode %{ 13891 int vector_len = 2; 13892 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13893 %} 13894 ins_pipe( pipe_slow ); 13895 %} 13896 13897 instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ 13898 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 13899 match(Set dst (ConvertVF2VD src)); 13900 format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} 13901 ins_encode %{ 13902 int vector_len = 0; 13903 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 13904 %} 13905 ins_pipe( pipe_slow ); 13906 %} 13907 13908 instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ 13909 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 13910 match(Set dst (ConvertVF2VD src)); 13911 format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} 13912 ins_encode %{ 13913 int vector_len = 1; 13914 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 13915 %} 13916 ins_pipe( pipe_slow ); 13917 %} 13918 13919 instruct vcvt8Fto4D_reg(vecY dst, vecY src) %{ 13920 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 13921 match(Set dst (ConvertVF2VD src)); 13922 format %{ "vcvtps2pd $dst,$src\t! convert 8F to 4D vector" %} 13923 ins_encode %{ 13924 int vector_len = 1; 13925 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 13926 %} 13927 ins_pipe( pipe_slow ); 13928 %} 13929 13930 instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ 13931 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 13932 match(Set dst (ConvertVF2VD src)); 13933 format %{ "evcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} 13934 ins_encode %{ 13935 int vector_len = 2; 13936 __ evcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 13937 %} 13938 ins_pipe( pipe_slow ); 13939 %} 13940 13941 instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ 13942 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 13943 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13944 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13945 match(Set dst (VectorMaskCmp src1 src2)); 13946 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} 13947 ins_encode %{ 13948 int vector_len = 0; 13949 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 13950 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13951 %} 13952 ins_pipe( pipe_slow ); 13953 %} 13954 13955 instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ 13956 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 13957 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13958 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13959 match(Set dst (VectorMaskCmp src1 src2)); 13960 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} 13961 ins_encode %{ 13962 int vector_len = 0; 13963 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 13964 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13965 %} 13966 ins_pipe( pipe_slow ); 13967 %} 13968 13969 instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ 13970 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 13971 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13972 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13973 match(Set dst (VectorMaskCmp src1 src2)); 13974 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} 13975 ins_encode %{ 13976 int vector_len = 1; 13977 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 13978 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13979 %} 13980 ins_pipe( pipe_slow ); 13981 %} 13982 13983 instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 13984 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 13985 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 13986 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 13987 match(Set dst (VectorMaskCmp src1 src2)); 13988 effect(TEMP dst, TEMP scratch); 13989 format %{ "vcmpeqps k2,$src1,$src2\n\t" 13990 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} 13991 ins_encode %{ 13992 int vector_len = 2; 13993 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 13994 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 13995 KRegister mask = k0; // The comparison itself is not being masked. 13996 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 13997 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 13998 %} 13999 ins_pipe( pipe_slow ); 14000 %} 14001 14002 instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ 14003 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14004 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14005 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14006 match(Set dst (VectorMaskCmp src1 src2)); 14007 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} 14008 ins_encode %{ 14009 int vector_len = 0; 14010 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14011 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14012 %} 14013 ins_pipe( pipe_slow ); 14014 %} 14015 14016 instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ 14017 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14018 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14019 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14020 match(Set dst (VectorMaskCmp src1 src2)); 14021 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} 14022 ins_encode %{ 14023 int vector_len = 0; 14024 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14025 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14026 %} 14027 ins_pipe( pipe_slow ); 14028 %} 14029 14030 instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ 14031 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14032 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14033 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14034 match(Set dst (VectorMaskCmp src1 src2)); 14035 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} 14036 ins_encode %{ 14037 int vector_len = 1; 14038 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14039 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14040 %} 14041 ins_pipe( pipe_slow ); 14042 %} 14043 14044 instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14045 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14046 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14047 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14048 match(Set dst (VectorMaskCmp src1 src2)); 14049 effect(TEMP dst, TEMP scratch); 14050 format %{ "vcmpltps k2,$src1,$src2\n\t" 14051 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} 14052 ins_encode %{ 14053 int vector_len = 2; 14054 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14055 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14056 KRegister mask = k0; // The comparison itself is not being masked. 14057 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14058 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14059 %} 14060 ins_pipe( pipe_slow ); 14061 %} 14062 14063 instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ 14064 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14065 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14066 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14067 match(Set dst (VectorMaskCmp src1 src2)); 14068 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} 14069 ins_encode %{ 14070 int vector_len = 0; 14071 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14072 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14073 %} 14074 ins_pipe( pipe_slow ); 14075 %} 14076 14077 instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ 14078 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14079 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14080 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14081 match(Set dst (VectorMaskCmp src1 src2)); 14082 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} 14083 ins_encode %{ 14084 int vector_len = 0; 14085 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14086 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14087 %} 14088 ins_pipe( pipe_slow ); 14089 %} 14090 14091 instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ 14092 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14093 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14094 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14095 match(Set dst (VectorMaskCmp src1 src2)); 14096 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} 14097 ins_encode %{ 14098 int vector_len = 1; 14099 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14100 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14101 %} 14102 ins_pipe( pipe_slow ); 14103 %} 14104 14105 instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14106 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14107 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14108 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14109 match(Set dst (VectorMaskCmp src1 src2)); 14110 effect(TEMP dst, TEMP scratch); 14111 format %{ "vcmpgtps k2,$src1,$src2\n\t" 14112 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} 14113 ins_encode %{ 14114 int vector_len = 2; 14115 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14116 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14117 KRegister mask = k0; // The comparison itself is not being masked. 14118 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14119 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14120 %} 14121 ins_pipe( pipe_slow ); 14122 %} 14123 14124 instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ 14125 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14126 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14127 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14128 match(Set dst (VectorMaskCmp src1 src2)); 14129 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} 14130 ins_encode %{ 14131 int vector_len = 0; 14132 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14133 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14134 %} 14135 ins_pipe( pipe_slow ); 14136 %} 14137 14138 instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ 14139 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14140 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14141 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14142 match(Set dst (VectorMaskCmp src1 src2)); 14143 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} 14144 ins_encode %{ 14145 int vector_len = 0; 14146 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14147 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14148 %} 14149 ins_pipe( pipe_slow ); 14150 %} 14151 14152 instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ 14153 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14154 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14155 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14156 match(Set dst (VectorMaskCmp src1 src2)); 14157 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} 14158 ins_encode %{ 14159 int vector_len = 1; 14160 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14161 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14162 %} 14163 ins_pipe( pipe_slow ); 14164 %} 14165 14166 instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14167 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14168 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14169 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14170 match(Set dst (VectorMaskCmp src1 src2)); 14171 effect(TEMP dst, TEMP scratch); 14172 format %{ "vcmpgeps k2,$src1,$src2\n\t" 14173 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} 14174 ins_encode %{ 14175 int vector_len = 2; 14176 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14177 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14178 KRegister mask = k0; // The comparison itself is not being masked. 14179 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14180 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14181 %} 14182 ins_pipe( pipe_slow ); 14183 %} 14184 14185 instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ 14186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14187 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14188 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14189 match(Set dst (VectorMaskCmp src1 src2)); 14190 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} 14191 ins_encode %{ 14192 int vector_len = 0; 14193 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14194 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14195 %} 14196 ins_pipe( pipe_slow ); 14197 %} 14198 14199 instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ 14200 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14201 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14202 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14203 match(Set dst (VectorMaskCmp src1 src2)); 14204 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} 14205 ins_encode %{ 14206 int vector_len = 0; 14207 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14208 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14209 %} 14210 ins_pipe( pipe_slow ); 14211 %} 14212 14213 instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ 14214 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14215 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14216 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14217 match(Set dst (VectorMaskCmp src1 src2)); 14218 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} 14219 ins_encode %{ 14220 int vector_len = 1; 14221 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14222 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14223 %} 14224 ins_pipe( pipe_slow ); 14225 %} 14226 14227 instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14228 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14229 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14230 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14231 match(Set dst (VectorMaskCmp src1 src2)); 14232 effect(TEMP dst, TEMP scratch); 14233 format %{ "vcmpleps k2,$src1,$src2\n\t" 14234 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} 14235 ins_encode %{ 14236 int vector_len = 2; 14237 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14238 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14239 KRegister mask = k0; // The comparison itself is not being masked. 14240 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14241 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14242 %} 14243 ins_pipe( pipe_slow ); 14244 %} 14245 14246 instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ 14247 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14248 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14249 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14250 match(Set dst (VectorMaskCmp src1 src2)); 14251 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} 14252 ins_encode %{ 14253 int vector_len = 0; 14254 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14255 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14256 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14257 %} 14258 ins_pipe( pipe_slow ); 14259 %} 14260 14261 instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ 14262 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14263 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14264 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14265 match(Set dst (VectorMaskCmp src1 src2)); 14266 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} 14267 ins_encode %{ 14268 int vector_len = 0; 14269 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14270 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14271 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14272 %} 14273 ins_pipe( pipe_slow ); 14274 %} 14275 14276 instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ 14277 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 14278 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14279 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14280 match(Set dst (VectorMaskCmp src1 src2)); 14281 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} 14282 ins_encode %{ 14283 int vector_len = 1; 14284 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14285 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14286 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14287 %} 14288 ins_pipe( pipe_slow ); 14289 %} 14290 14291 instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14292 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14293 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14294 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14295 match(Set dst (VectorMaskCmp src1 src2)); 14296 effect(TEMP dst, TEMP scratch); 14297 format %{ "vcmpneps k2,$src1,$src2\n\t" 14298 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} 14299 ins_encode %{ 14300 int vector_len = 2; 14301 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14302 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14303 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14304 KRegister mask = k0; // The comparison itself is not being masked. 14305 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14306 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14307 %} 14308 ins_pipe( pipe_slow ); 14309 %} 14310 14311 instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ 14312 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 14313 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14314 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14315 match(Set dst (VectorMaskCmp src1 src2)); 14316 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} 14317 ins_encode %{ 14318 int vector_len = 0; 14319 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14320 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14321 %} 14322 ins_pipe( pipe_slow ); 14323 %} 14324 14325 instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ 14326 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14327 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14328 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14329 match(Set dst (VectorMaskCmp src1 src2)); 14330 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} 14331 ins_encode %{ 14332 int vector_len = 0; 14333 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14334 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14335 %} 14336 ins_pipe( pipe_slow ); 14337 %} 14338 14339 instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ 14340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14341 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14342 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14343 match(Set dst (VectorMaskCmp src1 src2)); 14344 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} 14345 ins_encode %{ 14346 int vector_len = 1; 14347 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14348 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14349 %} 14350 ins_pipe( pipe_slow ); 14351 %} 14352 14353 instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14354 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 14355 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14356 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14357 match(Set dst (VectorMaskCmp src1 src2)); 14358 effect(TEMP dst, TEMP scratch); 14359 format %{ "vcmpeqpd k2,$src1,$src2\n\t" 14360 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} 14361 ins_encode %{ 14362 int vector_len = 2; 14363 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 14364 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14365 KRegister mask = k0; // The comparison itself is not being masked. 14366 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14367 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14368 %} 14369 ins_pipe( pipe_slow ); 14370 %} 14371 14372 instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ 14373 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 14374 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14375 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14376 match(Set dst (VectorMaskCmp src1 src2)); 14377 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} 14378 ins_encode %{ 14379 int vector_len = 0; 14380 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14381 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14382 %} 14383 ins_pipe( pipe_slow ); 14384 %} 14385 14386 instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ 14387 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14388 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14389 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14390 match(Set dst (VectorMaskCmp src1 src2)); 14391 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} 14392 ins_encode %{ 14393 int vector_len = 0; 14394 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14395 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14396 %} 14397 ins_pipe( pipe_slow ); 14398 %} 14399 14400 instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ 14401 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14402 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14403 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14404 match(Set dst (VectorMaskCmp src1 src2)); 14405 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} 14406 ins_encode %{ 14407 int vector_len = 1; 14408 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14409 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14410 %} 14411 ins_pipe( pipe_slow ); 14412 %} 14413 14414 instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14415 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 14416 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14417 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14418 match(Set dst (VectorMaskCmp src1 src2)); 14419 effect(TEMP dst, TEMP scratch); 14420 format %{ "vcmpltpd k2,$src1,$src2\n\t" 14421 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} 14422 ins_encode %{ 14423 int vector_len = 2; 14424 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 14425 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14426 KRegister mask = k0; // The comparison itself is not being masked. 14427 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14428 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14429 %} 14430 ins_pipe( pipe_slow ); 14431 %} 14432 14433 instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ 14434 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 14435 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14436 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14437 match(Set dst (VectorMaskCmp src1 src2)); 14438 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} 14439 ins_encode %{ 14440 int vector_len = 0; 14441 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14442 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14443 %} 14444 ins_pipe( pipe_slow ); 14445 %} 14446 14447 instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ 14448 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14449 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14450 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14451 match(Set dst (VectorMaskCmp src1 src2)); 14452 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} 14453 ins_encode %{ 14454 int vector_len = 0; 14455 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14456 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14457 %} 14458 ins_pipe( pipe_slow ); 14459 %} 14460 14461 instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ 14462 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14463 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14464 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14465 match(Set dst (VectorMaskCmp src1 src2)); 14466 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} 14467 ins_encode %{ 14468 int vector_len = 1; 14469 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14470 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14471 %} 14472 ins_pipe( pipe_slow ); 14473 %} 14474 14475 instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14476 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 14477 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14478 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14479 match(Set dst (VectorMaskCmp src1 src2)); 14480 effect(TEMP dst, TEMP scratch); 14481 format %{ "vcmpgtpd k2,$src1,$src2\n\t" 14482 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} 14483 ins_encode %{ 14484 int vector_len = 2; 14485 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 14486 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14487 KRegister mask = k0; // The comparison itself is not being masked. 14488 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14489 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14490 %} 14491 ins_pipe( pipe_slow ); 14492 %} 14493 14494 instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ 14495 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 14496 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14497 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14498 match(Set dst (VectorMaskCmp src1 src2)); 14499 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} 14500 ins_encode %{ 14501 int vector_len = 0; 14502 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14503 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14504 %} 14505 ins_pipe( pipe_slow ); 14506 %} 14507 14508 instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ 14509 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14510 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14511 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14512 match(Set dst (VectorMaskCmp src1 src2)); 14513 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} 14514 ins_encode %{ 14515 int vector_len = 0; 14516 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14517 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14518 %} 14519 ins_pipe( pipe_slow ); 14520 %} 14521 14522 instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ 14523 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14524 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14525 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14526 match(Set dst (VectorMaskCmp src1 src2)); 14527 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} 14528 ins_encode %{ 14529 int vector_len = 1; 14530 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14531 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14532 %} 14533 ins_pipe( pipe_slow ); 14534 %} 14535 14536 instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14537 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 14538 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14539 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14540 match(Set dst (VectorMaskCmp src1 src2)); 14541 effect(TEMP dst, TEMP scratch); 14542 format %{ "vcmpgepd k2,$src1,$src2\n\t" 14543 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} 14544 ins_encode %{ 14545 int vector_len = 2; 14546 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 14547 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14548 KRegister mask = k0; // The comparison itself is not being masked. 14549 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14550 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14551 %} 14552 ins_pipe( pipe_slow ); 14553 %} 14554 14555 instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ 14556 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 14557 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14558 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14559 match(Set dst (VectorMaskCmp src1 src2)); 14560 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} 14561 ins_encode %{ 14562 int vector_len = 0; 14563 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14564 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14565 %} 14566 ins_pipe( pipe_slow ); 14567 %} 14568 14569 instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ 14570 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14571 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14572 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14573 match(Set dst (VectorMaskCmp src1 src2)); 14574 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} 14575 ins_encode %{ 14576 int vector_len = 0; 14577 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14578 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14579 %} 14580 ins_pipe( pipe_slow ); 14581 %} 14582 14583 instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ 14584 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14585 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14586 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14587 match(Set dst (VectorMaskCmp src1 src2)); 14588 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} 14589 ins_encode %{ 14590 int vector_len = 1; 14591 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14592 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14593 %} 14594 ins_pipe( pipe_slow ); 14595 %} 14596 14597 instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14598 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 14599 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14600 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14601 match(Set dst (VectorMaskCmp src1 src2)); 14602 effect(TEMP dst, TEMP scratch); 14603 format %{ "vcmplepd k2,$src1,$src2\n\t" 14604 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} 14605 ins_encode %{ 14606 int vector_len = 2; 14607 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 14608 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14609 KRegister mask = k0; // The comparison itself is not being masked. 14610 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14611 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14612 %} 14613 ins_pipe( pipe_slow ); 14614 %} 14615 14616 instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ 14617 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 14618 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14619 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14620 match(Set dst (VectorMaskCmp src1 src2)); 14621 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} 14622 ins_encode %{ 14623 int vector_len = 0; 14624 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14625 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14626 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14627 %} 14628 ins_pipe( pipe_slow ); 14629 %} 14630 14631 instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ 14632 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14633 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14634 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14635 match(Set dst (VectorMaskCmp src1 src2)); 14636 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} 14637 ins_encode %{ 14638 int vector_len = 0; 14639 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14640 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14641 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14642 %} 14643 ins_pipe( pipe_slow ); 14644 %} 14645 14646 instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ 14647 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14648 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14649 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14650 match(Set dst (VectorMaskCmp src1 src2)); 14651 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} 14652 ins_encode %{ 14653 int vector_len = 1; 14654 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14655 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14656 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14657 %} 14658 ins_pipe( pipe_slow ); 14659 %} 14660 14661 instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14662 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 14663 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14664 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14665 match(Set dst (VectorMaskCmp src1 src2)); 14666 effect(TEMP dst, TEMP scratch); 14667 format %{ "vcmpnepd k2,$src1,$src2\n\t" 14668 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} 14669 ins_encode %{ 14670 int vector_len = 2; 14671 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 14672 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 14673 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14674 KRegister mask = k0; // The comparison itself is not being masked. 14675 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14676 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14677 %} 14678 ins_pipe( pipe_slow ); 14679 %} 14680 14681 instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ 14682 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14683 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14684 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14685 match(Set dst (VectorMaskCmp src1 src2)); 14686 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} 14687 ins_encode %{ 14688 int vector_len = 0; 14689 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14690 %} 14691 ins_pipe( pipe_slow ); 14692 %} 14693 14694 instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ 14695 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14696 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14697 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14698 match(Set dst (VectorMaskCmp src1 src2)); 14699 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} 14700 ins_encode %{ 14701 int vector_len = 0; 14702 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14703 %} 14704 ins_pipe( pipe_slow ); 14705 %} 14706 14707 instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ 14708 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 14709 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14710 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14711 match(Set dst (VectorMaskCmp src1 src2)); 14712 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} 14713 ins_encode %{ 14714 int vector_len = 1; 14715 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14716 %} 14717 ins_pipe( pipe_slow ); 14718 %} 14719 14720 instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14721 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14722 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 14723 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14724 match(Set dst (VectorMaskCmp src1 src2)); 14725 effect(TEMP dst, TEMP scratch); 14726 format %{ "vpcmpeqd k2,$src1,$src2\n\t" 14727 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} 14728 ins_encode %{ 14729 int vector_len = 2; 14730 Assembler::ComparisonPredicate cmp = Assembler::eq; 14731 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14732 KRegister mask = k0; // The comparison itself is not being masked. 14733 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14734 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14735 %} 14736 ins_pipe( pipe_slow ); 14737 %} 14738 14739 instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ 14740 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14741 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14742 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14743 match(Set dst (VectorMaskCmp src1 src2)); 14744 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} 14745 ins_encode %{ 14746 int vector_len = 0; 14747 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 14748 %} 14749 ins_pipe( pipe_slow ); 14750 %} 14751 14752 instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ 14753 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14754 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14755 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14756 match(Set dst (VectorMaskCmp src1 src2)); 14757 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} 14758 ins_encode %{ 14759 int vector_len = 0; 14760 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 14761 %} 14762 ins_pipe( pipe_slow ); 14763 %} 14764 14765 instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ 14766 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 14767 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14768 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14769 match(Set dst (VectorMaskCmp src1 src2)); 14770 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} 14771 ins_encode %{ 14772 int vector_len = 1; 14773 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 14774 %} 14775 ins_pipe( pipe_slow ); 14776 %} 14777 14778 instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14779 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14780 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 14781 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14782 match(Set dst (VectorMaskCmp src1 src2)); 14783 effect(TEMP dst, TEMP scratch); 14784 format %{ "vpcmpnled k2,$src1,$src2\n\t" 14785 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 14786 ins_encode %{ 14787 int vector_len = 2; 14788 Assembler::ComparisonPredicate cmp = Assembler::lt; 14789 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14790 KRegister mask = k0; // The comparison itself is not being masked. 14791 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14792 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14793 %} 14794 ins_pipe( pipe_slow ); 14795 %} 14796 14797 instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ 14798 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14799 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14800 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14801 match(Set dst (VectorMaskCmp src1 src2)); 14802 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} 14803 ins_encode %{ 14804 int vector_len = 0; 14805 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14806 %} 14807 ins_pipe( pipe_slow ); 14808 %} 14809 14810 instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ 14811 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14812 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14813 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14814 match(Set dst (VectorMaskCmp src1 src2)); 14815 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} 14816 ins_encode %{ 14817 int vector_len = 0; 14818 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14819 %} 14820 ins_pipe( pipe_slow ); 14821 %} 14822 14823 instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ 14824 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 14825 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14826 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14827 match(Set dst (VectorMaskCmp src1 src2)); 14828 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} 14829 ins_encode %{ 14830 int vector_len = 1; 14831 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14832 %} 14833 ins_pipe( pipe_slow ); 14834 %} 14835 14836 instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14837 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14838 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 14839 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14840 match(Set dst (VectorMaskCmp src1 src2)); 14841 effect(TEMP dst, TEMP scratch); 14842 format %{ "vpcmpnled k2,$src1,$src2\n\t" 14843 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 14844 ins_encode %{ 14845 int vector_len = 2; 14846 Assembler::ComparisonPredicate cmp = Assembler::nle; 14847 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14848 KRegister mask = k0; // The comparison itself is not being masked. 14849 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14850 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14851 %} 14852 ins_pipe( pipe_slow ); 14853 %} 14854 14855 instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 14856 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14857 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14858 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14859 match(Set dst (VectorMaskCmp src1 src2)); 14860 effect(TEMP scratch); 14861 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 14862 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} 14863 ins_encode %{ 14864 int vector_len = 0; 14865 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 14866 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14867 %} 14868 ins_pipe( pipe_slow ); 14869 %} 14870 14871 instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 14872 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14873 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14874 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14875 match(Set dst (VectorMaskCmp src1 src2)); 14876 effect(TEMP scratch); 14877 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 14878 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} 14879 ins_encode %{ 14880 int vector_len = 0; 14881 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 14882 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14883 %} 14884 ins_pipe( pipe_slow ); 14885 %} 14886 14887 instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 14888 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 14889 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14890 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14891 match(Set dst (VectorMaskCmp src1 src2)); 14892 effect(TEMP scratch); 14893 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 14894 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} 14895 ins_encode %{ 14896 int vector_len = 1; 14897 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 14898 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14899 %} 14900 ins_pipe( pipe_slow ); 14901 %} 14902 14903 instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14904 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14905 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 14906 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14907 match(Set dst (VectorMaskCmp src1 src2)); 14908 effect(TEMP dst, TEMP scratch); 14909 format %{ "vpcmpnltd k2,$src1,$src2\n\t" 14910 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} 14911 ins_encode %{ 14912 int vector_len = 2; 14913 Assembler::ComparisonPredicate cmp = Assembler::nlt; 14914 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14915 KRegister mask = k0; // The comparison itself is not being masked. 14916 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14917 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14918 %} 14919 ins_pipe( pipe_slow ); 14920 %} 14921 14922 instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 14923 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14924 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14925 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14926 match(Set dst (VectorMaskCmp src1 src2)); 14927 effect(TEMP scratch); 14928 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 14929 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} 14930 ins_encode %{ 14931 int vector_len = 0; 14932 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14933 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14934 %} 14935 ins_pipe( pipe_slow ); 14936 %} 14937 14938 instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 14939 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 14940 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14941 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14942 match(Set dst (VectorMaskCmp src1 src2)); 14943 effect(TEMP scratch); 14944 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 14945 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} 14946 ins_encode %{ 14947 int vector_len = 0; 14948 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14949 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14950 %} 14951 ins_pipe( pipe_slow ); 14952 %} 14953 14954 instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 14955 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 14956 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14957 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14958 match(Set dst (VectorMaskCmp src1 src2)); 14959 effect(TEMP scratch); 14960 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 14961 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} 14962 ins_encode %{ 14963 int vector_len = 1; 14964 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14965 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 14966 %} 14967 ins_pipe( pipe_slow ); 14968 %} 14969 14970 instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 14971 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 14972 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 14973 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14974 match(Set dst (VectorMaskCmp src1 src2)); 14975 effect(TEMP dst, TEMP scratch); 14976 format %{ "vpcmpled k2,$src1,$src2\n\t" 14977 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} 14978 ins_encode %{ 14979 int vector_len = 2; 14980 Assembler::ComparisonPredicate cmp = Assembler::le; 14981 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 14982 KRegister mask = k0; // The comparison itself is not being masked. 14983 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 14984 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 14985 %} 14986 ins_pipe( pipe_slow ); 14987 %} 14988 14989 instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 14990 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 14991 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 14992 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 14993 match(Set dst (VectorMaskCmp src1 src2)); 14994 effect(TEMP scratch); 14995 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 14996 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} 14997 ins_encode %{ 14998 int vector_len = 0; 14999 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15000 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15001 %} 15002 ins_pipe( pipe_slow ); 15003 %} 15004 15005 instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15006 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15007 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15008 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15009 match(Set dst (VectorMaskCmp src1 src2)); 15010 effect(TEMP scratch); 15011 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 15012 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} 15013 ins_encode %{ 15014 int vector_len = 0; 15015 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15016 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15017 %} 15018 ins_pipe( pipe_slow ); 15019 %} 15020 15021 instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15022 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 15023 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15024 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15025 match(Set dst (VectorMaskCmp src1 src2)); 15026 effect(TEMP scratch); 15027 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 15028 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} 15029 ins_encode %{ 15030 int vector_len = 1; 15031 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15032 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15033 %} 15034 ins_pipe( pipe_slow ); 15035 %} 15036 15037 instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15038 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 15039 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15040 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 15041 match(Set dst (VectorMaskCmp src1 src2)); 15042 effect(TEMP dst, TEMP scratch); 15043 format %{ "vpcmpneqd k2,$src1,$src2\n\t" 15044 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} 15045 ins_encode %{ 15046 int vector_len = 2; 15047 Assembler::ComparisonPredicate cmp = Assembler::neq; 15048 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15049 KRegister mask = k0; // The comparison itself is not being masked. 15050 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15051 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15052 %} 15053 ins_pipe( pipe_slow ); 15054 %} 15055 15056 instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ 15057 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15058 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15059 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15060 match(Set dst (VectorMaskCmp src1 src2)); 15061 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} 15062 ins_encode %{ 15063 int vector_len = 0; 15064 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15065 %} 15066 ins_pipe( pipe_slow ); 15067 %} 15068 15069 instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ 15070 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15071 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15072 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15073 match(Set dst (VectorMaskCmp src1 src2)); 15074 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} 15075 ins_encode %{ 15076 int vector_len = 0; 15077 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15078 %} 15079 ins_pipe( pipe_slow ); 15080 %} 15081 15082 instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ 15083 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15084 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15085 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15086 match(Set dst (VectorMaskCmp src1 src2)); 15087 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} 15088 ins_encode %{ 15089 int vector_len = 1; 15090 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15091 %} 15092 ins_pipe( pipe_slow ); 15093 %} 15094 15095 instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15096 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 15097 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15098 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15099 match(Set dst (VectorMaskCmp src1 src2)); 15100 effect(TEMP dst, TEMP scratch); 15101 format %{ "vpcmpeqb k2,$src1,$src2\n\t" 15102 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} 15103 ins_encode %{ 15104 int vector_len = 2; 15105 Assembler::ComparisonPredicate cmp = Assembler::eq; 15106 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15107 KRegister mask = k0; // The comparison itself is not being masked. 15108 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15109 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15110 %} 15111 ins_pipe( pipe_slow ); 15112 %} 15113 15114 instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ 15115 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15116 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15117 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15118 match(Set dst (VectorMaskCmp src1 src2)); 15119 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} 15120 ins_encode %{ 15121 int vector_len = 0; 15122 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15123 %} 15124 ins_pipe( pipe_slow ); 15125 %} 15126 15127 instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ 15128 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15129 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15130 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15131 match(Set dst (VectorMaskCmp src1 src2)); 15132 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} 15133 ins_encode %{ 15134 int vector_len = 0; 15135 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15136 %} 15137 ins_pipe( pipe_slow ); 15138 %} 15139 15140 instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ 15141 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15142 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15143 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15144 match(Set dst (VectorMaskCmp src1 src2)); 15145 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} 15146 ins_encode %{ 15147 int vector_len = 1; 15148 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15149 %} 15150 ins_pipe( pipe_slow ); 15151 %} 15152 15153 instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15154 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 15155 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15156 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15157 match(Set dst (VectorMaskCmp src1 src2)); 15158 effect(TEMP dst, TEMP scratch); 15159 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 15160 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 15161 ins_encode %{ 15162 int vector_len = 2; 15163 Assembler::ComparisonPredicate cmp = Assembler::lt; 15164 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15165 KRegister mask = k0; // The comparison itself is not being masked. 15166 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15167 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15168 %} 15169 ins_pipe( pipe_slow ); 15170 %} 15171 15172 instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ 15173 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15174 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15175 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15176 match(Set dst (VectorMaskCmp src1 src2)); 15177 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} 15178 ins_encode %{ 15179 int vector_len = 0; 15180 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15181 %} 15182 ins_pipe( pipe_slow ); 15183 %} 15184 15185 instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ 15186 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15187 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15188 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15189 match(Set dst (VectorMaskCmp src1 src2)); 15190 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} 15191 ins_encode %{ 15192 int vector_len = 0; 15193 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15194 %} 15195 ins_pipe( pipe_slow ); 15196 %} 15197 15198 instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ 15199 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15200 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15201 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15202 match(Set dst (VectorMaskCmp src1 src2)); 15203 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} 15204 ins_encode %{ 15205 int vector_len = 1; 15206 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15207 %} 15208 ins_pipe( pipe_slow ); 15209 %} 15210 15211 instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15212 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 15213 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15214 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15215 match(Set dst (VectorMaskCmp src1 src2)); 15216 effect(TEMP dst, TEMP scratch); 15217 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 15218 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 15219 ins_encode %{ 15220 int vector_len = 2; 15221 Assembler::ComparisonPredicate cmp = Assembler::nle; 15222 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15223 KRegister mask = k0; // The comparison itself is not being masked. 15224 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15225 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15226 %} 15227 ins_pipe( pipe_slow ); 15228 %} 15229 15230 instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15231 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15232 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15233 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15234 match(Set dst (VectorMaskCmp src1 src2)); 15235 effect(TEMP scratch); 15236 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 15237 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} 15238 ins_encode %{ 15239 int vector_len = 0; 15240 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15241 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15242 %} 15243 ins_pipe( pipe_slow ); 15244 %} 15245 15246 instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15247 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15248 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15249 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15250 match(Set dst (VectorMaskCmp src1 src2)); 15251 effect(TEMP scratch); 15252 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 15253 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} 15254 ins_encode %{ 15255 int vector_len = 0; 15256 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15257 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15258 %} 15259 ins_pipe( pipe_slow ); 15260 %} 15261 15262 instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15263 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15264 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15265 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15266 match(Set dst (VectorMaskCmp src1 src2)); 15267 effect(TEMP scratch); 15268 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 15269 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} 15270 ins_encode %{ 15271 int vector_len = 1; 15272 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15273 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15274 %} 15275 ins_pipe( pipe_slow ); 15276 %} 15277 15278 instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15279 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 15280 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15281 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15282 match(Set dst (VectorMaskCmp src1 src2)); 15283 effect(TEMP dst, TEMP scratch); 15284 format %{ "vpcmpnltb k2,$src1,$src2\n\t" 15285 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} 15286 ins_encode %{ 15287 int vector_len = 2; 15288 Assembler::ComparisonPredicate cmp = Assembler::nlt; 15289 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15290 KRegister mask = k0; // The comparison itself is not being masked. 15291 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15292 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15293 %} 15294 ins_pipe( pipe_slow ); 15295 %} 15296 15297 instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15298 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15299 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15300 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15301 match(Set dst (VectorMaskCmp src1 src2)); 15302 effect(TEMP scratch); 15303 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 15304 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} 15305 ins_encode %{ 15306 int vector_len = 0; 15307 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15308 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15309 %} 15310 ins_pipe( pipe_slow ); 15311 %} 15312 15313 instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15314 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15315 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15316 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15317 match(Set dst (VectorMaskCmp src1 src2)); 15318 effect(TEMP scratch); 15319 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 15320 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} 15321 ins_encode %{ 15322 int vector_len = 0; 15323 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15324 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15325 %} 15326 ins_pipe( pipe_slow ); 15327 %} 15328 15329 instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15330 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15331 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15332 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15333 match(Set dst (VectorMaskCmp src1 src2)); 15334 effect(TEMP scratch); 15335 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 15336 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} 15337 ins_encode %{ 15338 int vector_len = 1; 15339 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15340 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15341 %} 15342 ins_pipe( pipe_slow ); 15343 %} 15344 15345 instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15346 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 15347 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15348 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15349 match(Set dst (VectorMaskCmp src1 src2)); 15350 effect(TEMP dst, TEMP scratch); 15351 format %{ "vpcmpleb k2,$src1,$src2\n\t" 15352 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} 15353 ins_encode %{ 15354 int vector_len = 2; 15355 Assembler::ComparisonPredicate cmp = Assembler::le; 15356 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15357 KRegister mask = k0; // The comparison itself is not being masked. 15358 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15359 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15360 %} 15361 ins_pipe( pipe_slow ); 15362 %} 15363 15364 instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15365 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15366 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15367 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15368 match(Set dst (VectorMaskCmp src1 src2)); 15369 effect(TEMP scratch); 15370 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 15371 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} 15372 ins_encode %{ 15373 int vector_len = 0; 15374 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15375 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15376 %} 15377 ins_pipe( pipe_slow ); 15378 %} 15379 15380 instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15381 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 15382 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15383 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15384 match(Set dst (VectorMaskCmp src1 src2)); 15385 effect(TEMP scratch); 15386 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 15387 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} 15388 ins_encode %{ 15389 int vector_len = 0; 15390 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15391 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15392 %} 15393 ins_pipe( pipe_slow ); 15394 %} 15395 15396 instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15397 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 15398 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15399 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15400 match(Set dst (VectorMaskCmp src1 src2)); 15401 effect(TEMP scratch); 15402 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 15403 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} 15404 ins_encode %{ 15405 int vector_len = 1; 15406 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15407 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15408 %} 15409 ins_pipe( pipe_slow ); 15410 %} 15411 15412 instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15413 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 15414 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15415 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15416 match(Set dst (VectorMaskCmp src1 src2)); 15417 effect(TEMP dst, TEMP scratch); 15418 format %{ "vpcmpneqb k2,$src1,$src2\n\t" 15419 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} 15420 ins_encode %{ 15421 int vector_len = 2; 15422 Assembler::ComparisonPredicate cmp = Assembler::neq; 15423 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15424 KRegister mask = k0; // The comparison itself is not being masked. 15425 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15426 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15427 %} 15428 ins_pipe( pipe_slow ); 15429 %} 15430 15431 instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ 15432 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15433 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15434 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15435 match(Set dst (VectorMaskCmp src1 src2)); 15436 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} 15437 ins_encode %{ 15438 int vector_len = 0; 15439 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15440 %} 15441 ins_pipe( pipe_slow ); 15442 %} 15443 15444 instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ 15445 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15446 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15447 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15448 match(Set dst (VectorMaskCmp src1 src2)); 15449 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} 15450 ins_encode %{ 15451 int vector_len = 0; 15452 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15453 %} 15454 ins_pipe( pipe_slow ); 15455 %} 15456 15457 instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ 15458 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 15459 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15460 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15461 match(Set dst (VectorMaskCmp src1 src2)); 15462 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} 15463 ins_encode %{ 15464 int vector_len = 1; 15465 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15466 %} 15467 ins_pipe( pipe_slow ); 15468 %} 15469 15470 instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15471 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 15472 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15473 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15474 match(Set dst (VectorMaskCmp src1 src2)); 15475 effect(TEMP dst, TEMP scratch); 15476 format %{ "vpcmpeqw k2,$src1,$src2\n\t" 15477 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} 15478 ins_encode %{ 15479 int vector_len = 2; 15480 Assembler::ComparisonPredicate cmp = Assembler::eq; 15481 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15482 KRegister mask = k0; // The comparison itself is not being masked. 15483 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15484 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15485 %} 15486 ins_pipe( pipe_slow ); 15487 %} 15488 15489 instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ 15490 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15491 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15492 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15493 match(Set dst (VectorMaskCmp src1 src2)); 15494 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} 15495 ins_encode %{ 15496 int vector_len = 0; 15497 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15498 %} 15499 ins_pipe( pipe_slow ); 15500 %} 15501 15502 instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ 15503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15504 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15505 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15506 match(Set dst (VectorMaskCmp src1 src2)); 15507 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} 15508 ins_encode %{ 15509 int vector_len = 0; 15510 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15511 %} 15512 ins_pipe( pipe_slow ); 15513 %} 15514 15515 instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ 15516 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 15517 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15518 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15519 match(Set dst (VectorMaskCmp src1 src2)); 15520 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} 15521 ins_encode %{ 15522 int vector_len = 1; 15523 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15524 %} 15525 ins_pipe( pipe_slow ); 15526 %} 15527 15528 instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15529 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 15530 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15531 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15532 match(Set dst (VectorMaskCmp src1 src2)); 15533 effect(TEMP dst, TEMP scratch); 15534 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 15535 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 15536 ins_encode %{ 15537 int vector_len = 2; 15538 Assembler::ComparisonPredicate cmp = Assembler::lt; 15539 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15540 KRegister mask = k0; // The comparison itself is not being masked. 15541 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15542 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15543 %} 15544 ins_pipe( pipe_slow ); 15545 %} 15546 15547 instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ 15548 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15549 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15550 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15551 match(Set dst (VectorMaskCmp src1 src2)); 15552 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} 15553 ins_encode %{ 15554 int vector_len = 0; 15555 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15556 %} 15557 ins_pipe( pipe_slow ); 15558 %} 15559 15560 instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ 15561 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15562 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15563 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15564 match(Set dst (VectorMaskCmp src1 src2)); 15565 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} 15566 ins_encode %{ 15567 int vector_len = 0; 15568 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15569 %} 15570 ins_pipe( pipe_slow ); 15571 %} 15572 15573 instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ 15574 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 15575 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15576 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15577 match(Set dst (VectorMaskCmp src1 src2)); 15578 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} 15579 ins_encode %{ 15580 int vector_len = 1; 15581 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15582 %} 15583 ins_pipe( pipe_slow ); 15584 %} 15585 15586 instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15587 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 15588 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15589 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15590 match(Set dst (VectorMaskCmp src1 src2)); 15591 effect(TEMP dst, TEMP scratch); 15592 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 15593 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 15594 ins_encode %{ 15595 int vector_len = 2; 15596 Assembler::ComparisonPredicate cmp = Assembler::nle; 15597 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15598 KRegister mask = k0; // The comparison itself is not being masked. 15599 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15600 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15601 %} 15602 ins_pipe( pipe_slow ); 15603 %} 15604 15605 instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15606 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15607 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15608 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15609 match(Set dst (VectorMaskCmp src1 src2)); 15610 effect(TEMP scratch); 15611 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 15612 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} 15613 ins_encode %{ 15614 int vector_len = 0; 15615 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15616 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15617 %} 15618 ins_pipe( pipe_slow ); 15619 %} 15620 15621 instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15622 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15623 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15624 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15625 match(Set dst (VectorMaskCmp src1 src2)); 15626 effect(TEMP scratch); 15627 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 15628 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} 15629 ins_encode %{ 15630 int vector_len = 0; 15631 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15632 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15633 %} 15634 ins_pipe( pipe_slow ); 15635 %} 15636 15637 instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15638 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 15639 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15640 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15641 match(Set dst (VectorMaskCmp src1 src2)); 15642 effect(TEMP scratch); 15643 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 15644 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} 15645 ins_encode %{ 15646 int vector_len = 1; 15647 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15648 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15649 %} 15650 ins_pipe( pipe_slow ); 15651 %} 15652 15653 instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15654 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 15655 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15656 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15657 match(Set dst (VectorMaskCmp src1 src2)); 15658 effect(TEMP dst, TEMP scratch); 15659 format %{ "vpcmpnltw k2,$src1,$src2\n\t" 15660 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} 15661 ins_encode %{ 15662 int vector_len = 2; 15663 Assembler::ComparisonPredicate cmp = Assembler::nlt; 15664 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15665 KRegister mask = k0; // The comparison itself is not being masked. 15666 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15667 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15668 %} 15669 ins_pipe( pipe_slow ); 15670 %} 15671 15672 instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15673 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15674 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15675 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15676 match(Set dst (VectorMaskCmp src1 src2)); 15677 effect(TEMP scratch); 15678 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 15679 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} 15680 ins_encode %{ 15681 int vector_len = 0; 15682 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15683 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15684 %} 15685 ins_pipe( pipe_slow ); 15686 %} 15687 15688 instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15689 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15690 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15691 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15692 match(Set dst (VectorMaskCmp src1 src2)); 15693 effect(TEMP scratch); 15694 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 15695 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} 15696 ins_encode %{ 15697 int vector_len = 0; 15698 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15699 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15700 %} 15701 ins_pipe( pipe_slow ); 15702 %} 15703 15704 instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15705 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 15706 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15707 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15708 match(Set dst (VectorMaskCmp src1 src2)); 15709 effect(TEMP scratch); 15710 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 15711 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} 15712 ins_encode %{ 15713 int vector_len = 1; 15714 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15715 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15716 %} 15717 ins_pipe( pipe_slow ); 15718 %} 15719 15720 instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15721 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 15722 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 15723 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15724 match(Set dst (VectorMaskCmp src1 src2)); 15725 effect(TEMP dst, TEMP scratch); 15726 format %{ "vpcmplew k2,$src1,$src2\n\t" 15727 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} 15728 ins_encode %{ 15729 int vector_len = 2; 15730 Assembler::ComparisonPredicate cmp = Assembler::le; 15731 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15732 KRegister mask = k0; // The comparison itself is not being masked. 15733 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15734 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15735 %} 15736 ins_pipe( pipe_slow ); 15737 %} 15738 15739 instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15740 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 15741 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15742 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15743 match(Set dst (VectorMaskCmp src1 src2)); 15744 effect(TEMP scratch); 15745 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 15746 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} 15747 ins_encode %{ 15748 int vector_len = 0; 15749 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15750 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15751 %} 15752 ins_pipe( pipe_slow ); 15753 %} 15754 15755 instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15756 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 15757 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15758 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15759 match(Set dst (VectorMaskCmp src1 src2)); 15760 effect(TEMP scratch); 15761 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 15762 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} 15763 ins_encode %{ 15764 int vector_len = 0; 15765 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15766 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15767 %} 15768 ins_pipe( pipe_slow ); 15769 %} 15770 15771 instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 15772 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 15773 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15774 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15775 match(Set dst (VectorMaskCmp src1 src2)); 15776 effect(TEMP scratch); 15777 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 15778 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} 15779 ins_encode %{ 15780 int vector_len = 1; 15781 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15782 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15783 %} 15784 ins_pipe( pipe_slow ); 15785 %} 15786 15787 instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15788 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 15789 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 15790 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15791 match(Set dst (VectorMaskCmp src1 src2)); 15792 effect(TEMP dst, TEMP scratch); 15793 format %{ "vpcmpneqw k2,$src1,$src2\n\t" 15794 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} 15795 ins_encode %{ 15796 int vector_len = 2; 15797 Assembler::ComparisonPredicate cmp = Assembler::neq; 15798 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15799 KRegister mask = k0; // The comparison itself is not being masked. 15800 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15801 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15802 %} 15803 ins_pipe( pipe_slow ); 15804 %} 15805 15806 instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ 15807 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15808 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15809 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15810 match(Set dst (VectorMaskCmp src1 src2)); 15811 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} 15812 ins_encode %{ 15813 int vector_len = 0; 15814 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15815 %} 15816 ins_pipe( pipe_slow ); 15817 %} 15818 15819 instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ 15820 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15821 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15822 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15823 match(Set dst (VectorMaskCmp src1 src2)); 15824 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} 15825 ins_encode %{ 15826 int vector_len = 0; 15827 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15828 %} 15829 ins_pipe( pipe_slow ); 15830 %} 15831 15832 instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ 15833 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 15834 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15835 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15836 match(Set dst (VectorMaskCmp src1 src2)); 15837 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} 15838 ins_encode %{ 15839 int vector_len = 1; 15840 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15841 %} 15842 ins_pipe( pipe_slow ); 15843 %} 15844 15845 instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15846 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15847 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 15848 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15849 match(Set dst (VectorMaskCmp src1 src2)); 15850 effect(TEMP dst, TEMP scratch); 15851 format %{ "vpcmpeqq k2,$src1,$src2\n\t" 15852 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} 15853 ins_encode %{ 15854 int vector_len = 2; 15855 Assembler::ComparisonPredicate cmp = Assembler::eq; 15856 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15857 KRegister mask = k0; // The comparison itself is not being masked. 15858 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15859 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15860 %} 15861 ins_pipe( pipe_slow ); 15862 %} 15863 15864 instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ 15865 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15866 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15867 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15868 match(Set dst (VectorMaskCmp src1 src2)); 15869 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} 15870 ins_encode %{ 15871 int vector_len = 0; 15872 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15873 %} 15874 ins_pipe( pipe_slow ); 15875 %} 15876 15877 instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ 15878 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15879 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15880 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15881 match(Set dst (VectorMaskCmp src1 src2)); 15882 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} 15883 ins_encode %{ 15884 int vector_len = 0; 15885 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15886 %} 15887 ins_pipe( pipe_slow ); 15888 %} 15889 15890 instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ 15891 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 15892 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15893 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15894 match(Set dst (VectorMaskCmp src1 src2)); 15895 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} 15896 ins_encode %{ 15897 int vector_len = 1; 15898 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15899 %} 15900 ins_pipe( pipe_slow ); 15901 %} 15902 15903 instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15904 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15905 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 15906 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15907 match(Set dst (VectorMaskCmp src1 src2)); 15908 effect(TEMP dst, TEMP scratch); 15909 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 15910 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 15911 ins_encode %{ 15912 int vector_len = 2; 15913 Assembler::ComparisonPredicate cmp = Assembler::lt; 15914 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15915 KRegister mask = k0; // The comparison itself is not being masked. 15916 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15917 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15918 %} 15919 ins_pipe( pipe_slow ); 15920 %} 15921 15922 instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ 15923 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15924 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15925 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15926 match(Set dst (VectorMaskCmp src1 src2)); 15927 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} 15928 ins_encode %{ 15929 int vector_len = 0; 15930 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15931 %} 15932 ins_pipe( pipe_slow ); 15933 %} 15934 15935 instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ 15936 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15937 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15938 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15939 match(Set dst (VectorMaskCmp src1 src2)); 15940 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} 15941 ins_encode %{ 15942 int vector_len = 0; 15943 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15944 %} 15945 ins_pipe( pipe_slow ); 15946 %} 15947 15948 instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ 15949 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 15950 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15951 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15952 match(Set dst (VectorMaskCmp src1 src2)); 15953 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} 15954 ins_encode %{ 15955 int vector_len = 1; 15956 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15957 %} 15958 ins_pipe( pipe_slow ); 15959 %} 15960 15961 instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 15962 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 15963 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 15964 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15965 match(Set dst (VectorMaskCmp src1 src2)); 15966 effect(TEMP dst, TEMP scratch); 15967 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 15968 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 15969 ins_encode %{ 15970 int vector_len = 2; 15971 Assembler::ComparisonPredicate cmp = Assembler::nle; 15972 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 15973 KRegister mask = k0; // The comparison itself is not being masked. 15974 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 15975 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 15976 %} 15977 ins_pipe( pipe_slow ); 15978 %} 15979 15980 instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 15981 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 15982 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15983 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15984 match(Set dst (VectorMaskCmp src1 src2)); 15985 effect(TEMP scratch); 15986 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 15987 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} 15988 ins_encode %{ 15989 int vector_len = 0; 15990 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 15991 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 15992 %} 15993 ins_pipe( pipe_slow ); 15994 %} 15995 15996 instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 15997 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 15998 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 15999 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16000 match(Set dst (VectorMaskCmp src1 src2)); 16001 effect(TEMP scratch); 16002 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 16003 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} 16004 ins_encode %{ 16005 int vector_len = 0; 16006 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16007 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16008 %} 16009 ins_pipe( pipe_slow ); 16010 %} 16011 16012 instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16013 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16014 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16015 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16016 match(Set dst (VectorMaskCmp src1 src2)); 16017 effect(TEMP scratch); 16018 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 16019 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} 16020 ins_encode %{ 16021 int vector_len = 1; 16022 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 16023 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16024 %} 16025 ins_pipe( pipe_slow ); 16026 %} 16027 16028 instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16029 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16030 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 16031 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16032 match(Set dst (VectorMaskCmp src1 src2)); 16033 effect(TEMP dst, TEMP scratch); 16034 format %{ "vpcmpnltq k2,$src1,$src2\n\t" 16035 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} 16036 ins_encode %{ 16037 int vector_len = 2; 16038 Assembler::ComparisonPredicate cmp = Assembler::nlt; 16039 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16040 KRegister mask = k0; // The comparison itself is not being masked. 16041 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16042 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16043 %} 16044 ins_pipe( pipe_slow ); 16045 %} 16046 16047 instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16048 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16049 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16050 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16051 match(Set dst (VectorMaskCmp src1 src2)); 16052 effect(TEMP scratch); 16053 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 16054 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} 16055 ins_encode %{ 16056 int vector_len = 0; 16057 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16058 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16059 %} 16060 ins_pipe( pipe_slow ); 16061 %} 16062 16063 instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16064 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16065 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16066 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16067 match(Set dst (VectorMaskCmp src1 src2)); 16068 effect(TEMP scratch); 16069 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 16070 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} 16071 ins_encode %{ 16072 int vector_len = 0; 16073 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16074 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16075 %} 16076 ins_pipe( pipe_slow ); 16077 %} 16078 16079 instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16080 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16081 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16082 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16083 match(Set dst (VectorMaskCmp src1 src2)); 16084 effect(TEMP scratch); 16085 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 16086 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} 16087 ins_encode %{ 16088 int vector_len = 1; 16089 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16090 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16091 %} 16092 ins_pipe( pipe_slow ); 16093 %} 16094 16095 instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16096 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16097 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 16098 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16099 match(Set dst (VectorMaskCmp src1 src2)); 16100 effect(TEMP dst, TEMP scratch); 16101 format %{ "vpcmpleq k2,$src1,$src2\n\t" 16102 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} 16103 ins_encode %{ 16104 int vector_len = 2; 16105 Assembler::ComparisonPredicate cmp = Assembler::le; 16106 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16107 KRegister mask = k0; // The comparison itself is not being masked. 16108 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16109 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16110 %} 16111 ins_pipe( pipe_slow ); 16112 %} 16113 16114 instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 16115 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 16116 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16117 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16118 match(Set dst (VectorMaskCmp src1 src2)); 16119 effect(TEMP scratch); 16120 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 16121 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} 16122 ins_encode %{ 16123 int vector_len = 0; 16124 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16125 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16126 %} 16127 ins_pipe( pipe_slow ); 16128 %} 16129 16130 instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 16131 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 16132 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16133 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16134 match(Set dst (VectorMaskCmp src1 src2)); 16135 effect(TEMP scratch); 16136 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 16137 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} 16138 ins_encode %{ 16139 int vector_len = 0; 16140 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16141 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16142 %} 16143 ins_pipe( pipe_slow ); 16144 %} 16145 16146 instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 16147 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 16148 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16149 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16150 match(Set dst (VectorMaskCmp src1 src2)); 16151 effect(TEMP scratch); 16152 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 16153 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} 16154 ins_encode %{ 16155 int vector_len = 1; 16156 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 16157 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16158 %} 16159 ins_pipe( pipe_slow ); 16160 %} 16161 16162 instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 16163 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 16164 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 16165 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16166 match(Set dst (VectorMaskCmp src1 src2)); 16167 effect(TEMP dst, TEMP scratch); 16168 format %{ "vpcmpneqq k2,$src1,$src2\n\t" 16169 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} 16170 ins_encode %{ 16171 int vector_len = 2; 16172 Assembler::ComparisonPredicate cmp = Assembler::neq; 16173 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16174 KRegister mask = k0; // The comparison itself is not being masked. 16175 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 16176 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 16177 %} 16178 ins_pipe( pipe_slow ); 16179 %} 16180 16181 instruct blendvps2F(vecD dst, vecD src, rxmm0 mask) %{ 16182 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 16183 match(Set dst (VectorBlend (Binary dst src) mask)); 16184 format %{ "blendvps $dst,$src,$mask\t! packed2F" %} 16185 ins_encode %{ 16186 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 16187 %} 16188 ins_pipe( pipe_slow ); 16189 %} 16190 16191 instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ 16192 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 16193 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16194 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} 16195 ins_encode %{ 16196 int vector_len = 0; 16197 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16198 %} 16199 ins_pipe( pipe_slow ); 16200 %} 16201 16202 instruct blendvps4F(vecX dst, vecX src, rxmm0 mask) %{ 16203 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 16204 match(Set dst (VectorBlend (Binary dst src) mask)); 16205 format %{ "blendvps $dst,$src,$mask\t! packed4F" %} 16206 ins_encode %{ 16207 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 16208 %} 16209 ins_pipe( pipe_slow ); 16210 %} 16211 16212 instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ 16213 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 16214 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16215 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} 16216 ins_encode %{ 16217 int vector_len = 0; 16218 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16219 %} 16220 ins_pipe( pipe_slow ); 16221 %} 16222 16223 instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ 16224 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 16225 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16226 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} 16227 ins_encode %{ 16228 int vector_len = 1; 16229 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16230 %} 16231 ins_pipe( pipe_slow ); 16232 %} 16233 16234 instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 16235 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 16236 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16237 effect(TEMP scratch); 16238 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 16239 "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} 16240 ins_encode %{ 16241 int vector_len = 2; 16242 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16243 __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 16244 __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 16245 %} 16246 ins_pipe( pipe_slow ); 16247 %} 16248 16249 instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 16250 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 16251 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16252 effect(TEMP scratch); 16253 format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 16254 "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} 16255 ins_encode %{ 16256 int vector_len = 2; 16257 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16258 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 16259 __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 16260 %} 16261 ins_pipe( pipe_slow ); 16262 %} 16263 16264 instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 16265 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); 16266 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16267 effect(TEMP scratch); 16268 format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" 16269 "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} 16270 ins_encode %{ 16271 int vector_len = 2; 16272 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16273 __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 16274 __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 16275 %} 16276 ins_pipe( pipe_slow ); 16277 %} 16278 16279 instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 16280 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); 16281 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16282 effect(TEMP scratch); 16283 format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" 16284 "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} 16285 ins_encode %{ 16286 int vector_len = 2; 16287 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16288 __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 16289 __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 16290 %} 16291 ins_pipe( pipe_slow ); 16292 %} 16293 16294 instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 16295 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16296 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16297 effect(TEMP scratch); 16298 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 16299 "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} 16300 ins_encode %{ 16301 int vector_len = 2; 16302 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16303 __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 16304 __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 16305 %} 16306 ins_pipe( pipe_slow ); 16307 %} 16308 16309 instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 16310 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16311 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16312 effect(TEMP scratch); 16313 format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 16314 "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} 16315 ins_encode %{ 16316 int vector_len = 2; 16317 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 16318 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 16319 __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 16320 %} 16321 ins_pipe( pipe_slow ); 16322 %} 16323 16324 16325 instruct pblendvb2I(vecD dst, vecD src, rxmm0 mask) %{ 16326 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16327 match(Set dst (VectorBlend (Binary dst src) mask)); 16328 format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} 16329 ins_encode %{ 16330 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16331 %} 16332 ins_pipe( pipe_slow ); 16333 %} 16334 16335 instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ 16336 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16337 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16338 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} 16339 ins_encode %{ 16340 int vector_len = 0; 16341 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16342 %} 16343 ins_pipe( pipe_slow ); 16344 %} 16345 16346 instruct pblendvb4I(vecX dst, vecX src, rxmm0 mask) %{ 16347 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16348 match(Set dst (VectorBlend (Binary dst src) mask)); 16349 format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} 16350 ins_encode %{ 16351 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16352 %} 16353 ins_pipe( pipe_slow ); 16354 %} 16355 16356 instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ 16357 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16358 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16359 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} 16360 ins_encode %{ 16361 int vector_len = 0; 16362 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16363 %} 16364 ins_pipe( pipe_slow ); 16365 %} 16366 16367 instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ 16368 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16369 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16370 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} 16371 ins_encode %{ 16372 int vector_len = 1; 16373 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16374 %} 16375 ins_pipe( pipe_slow ); 16376 %} 16377 16378 instruct pblendvb8B(vecD dst, vecD src, rxmm0 mask) %{ 16379 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16380 match(Set dst (VectorBlend (Binary dst src) mask)); 16381 format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} 16382 ins_encode %{ 16383 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16384 %} 16385 ins_pipe( pipe_slow ); 16386 %} 16387 16388 instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ 16389 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16390 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16391 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} 16392 ins_encode %{ 16393 int vector_len = 0; 16394 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16395 %} 16396 ins_pipe( pipe_slow ); 16397 %} 16398 16399 instruct pblendvb16B(vecX dst, vecX src, rxmm0 mask) %{ 16400 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16401 match(Set dst (VectorBlend (Binary dst src) mask)); 16402 format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} 16403 ins_encode %{ 16404 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16405 %} 16406 ins_pipe( pipe_slow ); 16407 %} 16408 16409 instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ 16410 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16411 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16412 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} 16413 ins_encode %{ 16414 int vector_len = 0; 16415 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16416 %} 16417 ins_pipe( pipe_slow ); 16418 %} 16419 16420 instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ 16421 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16422 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16423 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} 16424 ins_encode %{ 16425 int vector_len = 1; 16426 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16427 %} 16428 ins_pipe( pipe_slow ); 16429 %} 16430 16431 instruct pblendvb4S(vecD dst, vecD src, rxmm0 mask) %{ 16432 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16433 match(Set dst (VectorBlend (Binary dst src) mask)); 16434 format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} 16435 ins_encode %{ 16436 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16437 %} 16438 ins_pipe( pipe_slow ); 16439 %} 16440 16441 instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ 16442 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16443 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16444 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} 16445 ins_encode %{ 16446 int vector_len = 0; 16447 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16448 %} 16449 ins_pipe( pipe_slow ); 16450 %} 16451 16452 instruct pblendvb8S(vecX dst, vecX src, rxmm0 mask) %{ 16453 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16454 match(Set dst (VectorBlend (Binary dst src) mask)); 16455 format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} 16456 ins_encode %{ 16457 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16458 %} 16459 ins_pipe( pipe_slow ); 16460 %} 16461 16462 instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ 16463 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16464 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16465 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} 16466 ins_encode %{ 16467 int vector_len = 0; 16468 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16469 %} 16470 ins_pipe( pipe_slow ); 16471 %} 16472 16473 instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ 16474 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16475 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16476 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} 16477 ins_encode %{ 16478 int vector_len = 1; 16479 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16480 %} 16481 ins_pipe( pipe_slow ); 16482 %} 16483 16484 instruct pblendvb1L(vecD dst, vecD src, rxmm0 mask) %{ 16485 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16486 match(Set dst (VectorBlend (Binary dst src) mask)); 16487 format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} 16488 ins_encode %{ 16489 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16490 %} 16491 ins_pipe( pipe_slow ); 16492 %} 16493 16494 instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ 16495 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16496 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16497 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} 16498 ins_encode %{ 16499 int vector_len = 0; 16500 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16501 %} 16502 ins_pipe( pipe_slow ); 16503 %} 16504 16505 instruct pblendvb2L(vecX dst, vecX src, rxmm0 mask) %{ 16506 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16507 match(Set dst (VectorBlend (Binary dst src) mask)); 16508 format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} 16509 ins_encode %{ 16510 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 16511 %} 16512 ins_pipe( pipe_slow ); 16513 %} 16514 16515 instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ 16516 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16517 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16518 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} 16519 ins_encode %{ 16520 int vector_len = 0; 16521 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16522 %} 16523 ins_pipe( pipe_slow ); 16524 %} 16525 16526 instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ 16527 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16528 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16529 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} 16530 ins_encode %{ 16531 int vector_len = 1; 16532 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16533 %} 16534 ins_pipe( pipe_slow ); 16535 %} 16536 16537 instruct blendvpd1D(vecD dst, vecD src, rxmm0 mask) %{ 16538 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 16539 match(Set dst (VectorBlend (Binary dst src) mask)); 16540 format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} 16541 ins_encode %{ 16542 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 16543 %} 16544 ins_pipe( pipe_slow ); 16545 %} 16546 16547 instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ 16548 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 16549 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16550 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} 16551 ins_encode %{ 16552 int vector_len = 0; 16553 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16554 %} 16555 ins_pipe( pipe_slow ); 16556 %} 16557 16558 instruct blendvpd2D(vecX dst, vecX src, rxmm0 mask) %{ 16559 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 16560 match(Set dst (VectorBlend (Binary dst src) mask)); 16561 format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} 16562 ins_encode %{ 16563 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 16564 %} 16565 ins_pipe( pipe_slow ); 16566 %} 16567 16568 instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ 16569 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 16570 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16571 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} 16572 ins_encode %{ 16573 int vector_len = 0; 16574 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16575 %} 16576 ins_pipe( pipe_slow ); 16577 %} 16578 16579 instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ 16580 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 16581 match(Set dst (VectorBlend (Binary src1 src2) mask)); 16582 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} 16583 ins_encode %{ 16584 int vector_len = 1; 16585 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 16586 %} 16587 ins_pipe( pipe_slow ); 16588 %} 16589 16590 // --------------------------------- NEG -------------------------------------- 16591 // a = -a 16592 instruct vneg2I_reg(vecD dst, vecD src) %{ 16593 predicate(UseSSE > 1 && n->as_Vector()->length() == 2); 16594 match(Set dst (NegVI src)); 16595 effect(TEMP dst); 16596 format %{ "pxor $dst,$dst\n\t" 16597 "psubd $dst, $src\t! neg packed2I" %} 16598 ins_cost(150); 16599 ins_encode %{ 16600 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 16601 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 16602 %} 16603 ins_pipe( pipe_slow ); 16604 %} 16605 16606 instruct vneg4I_reg(vecX dst, vecX src) %{ 16607 predicate(UseSSE > 1 && n->as_Vector()->length() == 4); 16608 match(Set dst (NegVI src)); 16609 effect(TEMP dst); 16610 format %{ "pxor $dst,$dst\n\t" 16611 "psubd $dst, $src\t! neg packed4I" %} 16612 ins_cost(150); 16613 ins_encode %{ 16614 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 16615 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 16616 %} 16617 ins_pipe( pipe_slow ); 16618 %} 16619 16620 instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ 16621 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 16622 match(Set dst (NegVI src)); 16623 effect(TEMP tmp); 16624 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 16625 "vpsubd $dst,$tmp,$src\t! neg packed8I" %} 16626 ins_cost(150); 16627 ins_encode %{ 16628 int vector_len = 1; 16629 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 16630 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 16631 %} 16632 ins_pipe( pipe_slow ); 16633 %} 16634 16635 instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ 16636 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16637 match(Set dst (NegVI src)); 16638 effect(TEMP tmp); 16639 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 16640 "vpsubd $dst,$tmp,$src\t! neg packed16I" %} 16641 ins_cost(150); 16642 ins_encode %{ 16643 int vector_len = 2; 16644 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 16645 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 16646 %} 16647 ins_pipe( pipe_slow ); 16648 %} 16649 16650 instruct vneg1D(regD dst) %{ 16651 predicate((UseSSE>=2) && (UseAVX == 0)); 16652 match(Set dst (NegVD dst)); 16653 ins_cost(150); 16654 format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} 16655 ins_encode %{ 16656 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 16657 %} 16658 ins_pipe(pipe_slow); 16659 %} 16660 16661 instruct vneg1D_reg(vecX dst, vecX src) %{ 16662 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 16663 match(Set dst (NegVD src)); 16664 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} 16665 ins_cost(150); 16666 ins_encode %{ 16667 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 16668 ExternalAddress(double_signflip())); 16669 %} 16670 ins_pipe( pipe_slow ); 16671 %} 16672 16673 instruct vneg2D_reg(vecX dst) %{ 16674 predicate((UseSSE>=2)); 16675 match(Set dst (NegVD dst)); 16676 ins_cost(150); 16677 format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} 16678 ins_encode %{ 16679 __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); 16680 %} 16681 ins_pipe(pipe_slow); 16682 %} 16683 16684 16685 instruct vneg4D_reg(vecY dst, vecY src) %{ 16686 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16687 match(Set dst (NegVD src)); 16688 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} 16689 ins_cost(150); 16690 ins_encode %{ 16691 int vector_len = 1; 16692 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 16693 %} 16694 ins_pipe( pipe_slow ); 16695 %} 16696 16697 instruct vneg8D_reg(vecZ dst, vecZ src) %{ 16698 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16699 match(Set dst (NegVD src)); 16700 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} 16701 ins_cost(150); 16702 ins_encode %{ 16703 int vector_len = 2; 16704 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 16705 %} 16706 ins_pipe( pipe_slow ); 16707 %} 16708 16709 instruct vneg2F_reg(vecD dst) %{ 16710 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 16711 match(Set dst (NegVF dst)); 16712 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} 16713 ins_cost(150); 16714 ins_encode %{ 16715 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 16716 %} 16717 ins_pipe( pipe_slow ); 16718 %} 16719 16720 instruct vneg4F_reg(vecX dst) %{ 16721 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 16722 match(Set dst (NegVF dst)); 16723 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} 16724 ins_cost(150); 16725 ins_encode %{ 16726 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 16727 %} 16728 ins_pipe( pipe_slow ); 16729 %} 16730 16731 instruct vneg8F_reg(vecY dst, vecY src) %{ 16732 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 16733 match(Set dst (NegVF src)); 16734 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} 16735 ins_cost(150); 16736 ins_encode %{ 16737 int vector_len = 1; 16738 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 16739 %} 16740 ins_pipe( pipe_slow ); 16741 %} 16742 16743 instruct vneg16F_reg(vecZ dst, vecZ src) %{ 16744 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16745 match(Set dst (NegVF src)); 16746 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} 16747 ins_cost(150); 16748 ins_encode %{ 16749 int vector_len = 2; 16750 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 16751 %} 16752 ins_pipe( pipe_slow ); 16753 %} 16754 16755 // --------------------------------- ABS -------------------------------------- 16756 // a = |a| 16757 instruct vabs8B_reg(vecD dst, vecD src) %{ 16758 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16759 match(Set dst (AbsV src)); 16760 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 16761 ins_cost(150); 16762 ins_encode %{ 16763 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 16764 %} 16765 ins_pipe( pipe_slow ); 16766 %} 16767 16768 instruct vabs16B_reg(vecX dst, vecX src) %{ 16769 predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16770 match(Set dst (AbsV src)); 16771 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 16772 ins_cost(150); 16773 ins_encode %{ 16774 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 16775 %} 16776 ins_pipe( pipe_slow ); 16777 %} 16778 16779 instruct vabs32B_reg(vecY dst, vecY src) %{ 16780 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16781 match(Set dst (AbsV src)); 16782 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 16783 ins_cost(150); 16784 ins_encode %{ 16785 int vector_len = 1; 16786 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16787 %} 16788 ins_pipe( pipe_slow ); 16789 %} 16790 16791 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 16792 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 16793 match(Set dst (AbsV src)); 16794 format %{ "evpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 16795 ins_cost(150); 16796 ins_encode %{ 16797 int vector_len = 2; 16798 __ evpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16799 %} 16800 ins_pipe( pipe_slow ); 16801 %} 16802 16803 instruct vabs4S_reg(vecD dst, vecD src) %{ 16804 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16805 match(Set dst (AbsV src)); 16806 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 16807 ins_cost(150); 16808 ins_encode %{ 16809 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 16810 %} 16811 ins_pipe( pipe_slow ); 16812 %} 16813 16814 instruct vabs8S_reg(vecX dst, vecX src) %{ 16815 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16816 match(Set dst (AbsV src)); 16817 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 16818 ins_cost(150); 16819 ins_encode %{ 16820 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 16821 %} 16822 ins_pipe( pipe_slow ); 16823 %} 16824 16825 instruct vabs16S_reg(vecY dst, vecY src) %{ 16826 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16827 match(Set dst (AbsV src)); 16828 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 16829 ins_cost(150); 16830 ins_encode %{ 16831 int vector_len = 1; 16832 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16833 %} 16834 ins_pipe( pipe_slow ); 16835 %} 16836 16837 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 16838 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 16839 match(Set dst (AbsV src)); 16840 format %{ "evpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 16841 ins_cost(150); 16842 ins_encode %{ 16843 int vector_len = 2; 16844 __ evpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16845 %} 16846 ins_pipe( pipe_slow ); 16847 %} 16848 16849 instruct vabs2I_reg(vecD dst, vecD src) %{ 16850 predicate(UseSSE > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16851 match(Set dst (AbsV src)); 16852 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 16853 ins_cost(150); 16854 ins_encode %{ 16855 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 16856 %} 16857 ins_pipe( pipe_slow ); 16858 %} 16859 16860 instruct vabs4I_reg(vecX dst, vecX src) %{ 16861 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16862 match(Set dst (AbsV src)); 16863 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 16864 ins_cost(150); 16865 ins_encode %{ 16866 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 16867 %} 16868 ins_pipe( pipe_slow ); 16869 %} 16870 16871 instruct vabs8I_reg(vecY dst, vecY src) %{ 16872 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16873 match(Set dst (AbsV src)); 16874 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 16875 ins_cost(150); 16876 ins_encode %{ 16877 int vector_len = 1; 16878 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16879 %} 16880 ins_pipe( pipe_slow ); 16881 %} 16882 16883 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 16884 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 16885 match(Set dst (AbsV src)); 16886 format %{ "evpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 16887 ins_cost(150); 16888 ins_encode %{ 16889 int vector_len = 2; 16890 __ evpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16891 %} 16892 ins_pipe( pipe_slow ); 16893 %} 16894 16895 instruct vabs2L_reg(vecX dst, vecX src) %{ 16896 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16897 match(Set dst (AbsV src)); 16898 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 16899 ins_cost(150); 16900 ins_encode %{ 16901 int vector_len = 0; 16902 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16903 %} 16904 ins_pipe( pipe_slow ); 16905 %} 16906 16907 instruct vabs4L_reg(vecY dst, vecY src) %{ 16908 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16909 match(Set dst (AbsV src)); 16910 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 16911 ins_cost(150); 16912 ins_encode %{ 16913 int vector_len = 1; 16914 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16915 %} 16916 ins_pipe( pipe_slow ); 16917 %} 16918 16919 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 16920 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 16921 match(Set dst (AbsV src)); 16922 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 16923 ins_cost(150); 16924 ins_encode %{ 16925 int vector_len = 2; 16926 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 16927 %} 16928 ins_pipe( pipe_slow ); 16929 %} 16930 16931 instruct vabs1D_reg(vecD dst) %{ 16932 predicate(UseSSE > 0 && n->as_Vector()->length() == 1); 16933 match(Set dst (AbsVD dst)); 16934 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} 16935 ins_cost(150); 16936 ins_encode %{ 16937 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 16938 %} 16939 ins_pipe( pipe_slow ); 16940 %} 16941 16942 instruct vabs2D_reg(vecX dst) %{ 16943 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 16944 match(Set dst (AbsVD dst)); 16945 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} 16946 ins_cost(150); 16947 ins_encode %{ 16948 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 16949 %} 16950 ins_pipe( pipe_slow ); 16951 %} 16952 16953 instruct vabs4D_reg(vecY dst, vecY src) %{ 16954 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16955 match(Set dst (AbsVD src)); 16956 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} 16957 ins_cost(150); 16958 ins_encode %{ 16959 int vector_len = 1; 16960 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 16961 %} 16962 ins_pipe( pipe_slow ); 16963 %} 16964 16965 instruct vabs8D_reg(vecZ dst, vecZ src) %{ 16966 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16967 match(Set dst (AbsVD src)); 16968 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} 16969 ins_cost(150); 16970 ins_encode %{ 16971 int vector_len = 2; 16972 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 16973 %} 16974 ins_pipe( pipe_slow ); 16975 %} 16976 16977 instruct vabs2F_reg(vecD dst) %{ 16978 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 16979 match(Set dst (AbsVF dst)); 16980 format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} 16981 ins_cost(150); 16982 ins_encode %{ 16983 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 16984 %} 16985 ins_pipe( pipe_slow ); 16986 %} 16987 16988 instruct vabs4F_reg(vecX dst) %{ 16989 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 16990 match(Set dst (AbsVF dst)); 16991 format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} 16992 ins_cost(150); 16993 ins_encode %{ 16994 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 16995 %} 16996 ins_pipe( pipe_slow ); 16997 %} 16998 16999 instruct vabs8F_reg(vecY dst, vecY src) %{ 17000 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 17001 match(Set dst (AbsVF src)); 17002 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} 17003 ins_cost(150); 17004 ins_encode %{ 17005 int vector_len = 1; 17006 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 17007 %} 17008 ins_pipe( pipe_slow ); 17009 %} 17010 17011 instruct vabs16F_reg(vecZ dst, vecZ src) %{ 17012 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17013 match(Set dst (AbsVF src)); 17014 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} 17015 ins_cost(150); 17016 ins_encode %{ 17017 int vector_len = 2; 17018 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 17019 %} 17020 ins_pipe( pipe_slow ); 17021 %} 17022 17023 //------------------------------------- NOT -------------------------------------------- 17024 instruct vnot4B(vecS dst, vecS src) %{ 17025 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); 17026 match(Set dst (NotV src)); 17027 effect(TEMP dst); 17028 format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} 17029 ins_encode %{ 17030 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 17031 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 17032 %} 17033 ins_pipe( pipe_slow ); 17034 %} 17035 17036 instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ 17037 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 17038 match(Set dst (NotV src)); 17039 effect(TEMP scratch); 17040 format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} 17041 ins_encode %{ 17042 int vector_len = 0; 17043 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17044 %} 17045 ins_pipe( pipe_slow ); 17046 %} 17047 17048 instruct vnot8B(vecD dst, vecD src) %{ 17049 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); 17050 match(Set dst (NotV src)); 17051 effect(TEMP dst); 17052 format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} 17053 ins_encode %{ 17054 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 17055 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 17056 %} 17057 ins_pipe( pipe_slow ); 17058 %} 17059 17060 instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ 17061 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 17062 match(Set dst (NotV src)); 17063 effect(TEMP scratch); 17064 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} 17065 ins_encode %{ 17066 int vector_len = 0; 17067 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17068 %} 17069 ins_pipe( pipe_slow ); 17070 %} 17071 17072 instruct vnot16B(vecX dst, vecX src) %{ 17073 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); 17074 match(Set dst (NotV src)); 17075 effect(TEMP dst); 17076 format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} 17077 ins_encode %{ 17078 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 17079 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 17080 %} 17081 ins_pipe( pipe_slow ); 17082 %} 17083 17084 instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ 17085 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 17086 match(Set dst (NotV src)); 17087 effect(TEMP scratch); 17088 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} 17089 ins_encode %{ 17090 int vector_len = 0; 17091 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17092 %} 17093 ins_pipe( pipe_slow ); 17094 %} 17095 17096 instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ 17097 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 17098 match(Set dst (NotV src)); 17099 effect(TEMP scratch); 17100 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} 17101 ins_encode %{ 17102 int vector_len = 1; 17103 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17104 %} 17105 ins_pipe( pipe_slow ); 17106 %} 17107 17108 instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ 17109 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 17110 match(Set dst (NotV src)); 17111 effect(TEMP scratch); 17112 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} 17113 ins_encode %{ 17114 int vector_len = 2; 17115 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17116 %} 17117 ins_pipe( pipe_slow ); 17118 %} 17119 17120 instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{ 17121 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 17122 match(Set dst (VectorTest src1 src2 )); 17123 format %{ "vptest $src1,$src2\n\t" 17124 "setb $dst\t!" %} 17125 ins_encode %{ 17126 int vector_len = 0; 17127 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17128 __ setb(Assembler::carrySet, $dst$$Register); 17129 __ movzbl($dst$$Register, $dst$$Register); 17130 %} 17131 ins_pipe( pipe_slow ); 17132 %} 17133 17134 instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{ 17135 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 17136 match(Set dst (VectorTest src1 src2 )); 17137 format %{ "vptest $src1,$src2\n\t" 17138 "setb $dst\t!" %} 17139 ins_encode %{ 17140 int vector_len = 0; 17141 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17142 __ setb(Assembler::notZero, $dst$$Register); 17143 __ movzbl($dst$$Register, $dst$$Register); 17144 %} 17145 ins_pipe( pipe_slow ); 17146 %} 17147 17148 instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{ 17149 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 17150 match(Set dst (VectorTest src1 src2 )); 17151 format %{ "vptest $src1,$src2\n\t" 17152 "setb $dst\t!" %} 17153 ins_encode %{ 17154 int vector_len = 1; 17155 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17156 __ setb(Assembler::carrySet, $dst$$Register); 17157 __ movzbl($dst$$Register, $dst$$Register); 17158 %} 17159 ins_pipe( pipe_slow ); 17160 %} 17161 17162 instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{ 17163 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 17164 match(Set dst (VectorTest src1 src2 )); 17165 format %{ "vptest $src1,$src2\n\t" 17166 "setb $dst\t!" %} 17167 ins_encode %{ 17168 int vector_len = 1; 17169 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 17170 __ setb(Assembler::notZero, $dst$$Register); 17171 __ movzbl($dst$$Register, $dst$$Register); 17172 %} 17173 ins_pipe( pipe_slow ); 17174 %} 17175 17176 instruct loadmask8b(vecD dst, vecD src) %{ 17177 predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17178 match(Set dst (VectorLoadMask src)); 17179 effect(TEMP dst); 17180 format %{ "pxor $dst,$dst\n\t" 17181 "psubb $dst,$src\t! load mask (8B to 8B)" %} 17182 ins_encode %{ 17183 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17184 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17185 %} 17186 ins_pipe( pipe_slow ); 17187 %} 17188 17189 instruct loadmask16b(vecX dst, vecX src) %{ 17190 predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17191 match(Set dst (VectorLoadMask src)); 17192 effect(TEMP dst); 17193 format %{ "vpxor $dst,$dst\n\t" 17194 "vpsubb $dst,$src\t! load mask (16B to 16B)" %} 17195 ins_encode %{ 17196 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17197 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17198 %} 17199 ins_pipe( pipe_slow ); 17200 %} 17201 17202 instruct loadmask32b(vecY dst, vecY src) %{ 17203 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17204 match(Set dst (VectorLoadMask src)); 17205 effect(TEMP dst); 17206 format %{ "vpxor $dst,$dst\n\t" 17207 "vpsubb $dst,$src\t! load mask (32B to 32B)" %} 17208 ins_encode %{ 17209 int vector_len = 1; 17210 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17211 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 17212 %} 17213 ins_pipe( pipe_slow ); 17214 %} 17215 17216 instruct loadmask64b(vecZ dst, vecZ src) %{ 17217 predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 17218 match(Set dst (VectorLoadMask src)); 17219 effect(TEMP dst); 17220 format %{ "vpxor $dst,$dst\n\t" 17221 "vpsubb $dst,$src\t! load mask (64B to 64B)" %} 17222 ins_encode %{ 17223 int vector_len = 2; 17224 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17225 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 17226 %} 17227 ins_pipe( pipe_slow ); 17228 %} 17229 17230 instruct loadmask4s(vecD dst, vecS src) %{ 17231 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17232 match(Set dst (VectorLoadMask src)); 17233 effect(TEMP dst); 17234 format %{ "pxor $dst,$dst\n\t" 17235 "psubb $dst,$src\n\t" 17236 "pmovsxbw $dst\t! load mask (4B to 4S)" %} 17237 ins_encode %{ 17238 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17239 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17240 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 17241 %} 17242 ins_pipe( pipe_slow ); 17243 %} 17244 17245 instruct loadmask8s(vecX dst, vecD src) %{ 17246 predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17247 match(Set dst (VectorLoadMask src)); 17248 effect(TEMP dst); 17249 format %{ "pxor $dst,$dst\n\t" 17250 "psubb $dst,$src\n\t" 17251 "pmovsxbw $dst\t! load mask (8B to 8S)" %} 17252 ins_encode %{ 17253 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17254 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17255 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 17256 %} 17257 ins_pipe( pipe_slow ); 17258 %} 17259 17260 instruct loadmask16s(vecY dst, vecX src) %{ 17261 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17262 match(Set dst (VectorLoadMask src)); 17263 effect(TEMP dst); 17264 format %{ "vpxor $dst,$dst\n\t" 17265 "vpsubb $dst,$src\n\t" 17266 "vpmovsxbw $dst\t! load mask (16B to 16S)" %} 17267 ins_encode %{ 17268 int vector_len = 1; 17269 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 17270 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 17271 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17272 %} 17273 ins_pipe( pipe_slow ); 17274 %} 17275 17276 instruct loadmask32s(vecZ dst, vecY src) %{ 17277 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 17278 match(Set dst (VectorLoadMask src)); 17279 effect(TEMP dst); 17280 format %{ "vpxor $dst,$dst\n\t" 17281 "vpsubb $dst,$src\n\t" 17282 "vpmovsxbw $dst\t! load mask (32B to 32S)" %} 17283 ins_encode %{ 17284 int vector_len = 2; 17285 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); 17286 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); 17287 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17288 %} 17289 ins_pipe( pipe_slow ); 17290 %} 17291 17292 instruct loadmask2i(vecD dst, vecS src) %{ 17293 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17294 match(Set dst (VectorLoadMask src)); 17295 effect(TEMP dst); 17296 format %{ "pxor $dst,$dst\n\t" 17297 "psubb $dst,$src\n\t" 17298 "pmovsxbd $dst\t! load mask (2B to 2I)" %} 17299 ins_encode %{ 17300 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17301 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17302 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 17303 %} 17304 ins_pipe( pipe_slow ); 17305 %} 17306 17307 instruct loadmask4i(vecX dst, vecS src) %{ 17308 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17309 match(Set dst (VectorLoadMask src)); 17310 effect(TEMP dst); 17311 format %{ "pxor $dst,$dst\n\t" 17312 "psubb $dst,$src\n\t" 17313 "pmovsxbd $dst\t! load mask (4B to 4I)" %} 17314 ins_encode %{ 17315 int vector_len = 0; 17316 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17317 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17318 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 17319 %} 17320 ins_pipe( pipe_slow ); 17321 %} 17322 17323 instruct loadmask8i(vecY dst, vecD src) %{ 17324 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17325 match(Set dst (VectorLoadMask src)); 17326 effect(TEMP dst); 17327 format %{ "vpxor $dst,$dst\n\t" 17328 "vpsubb $dst,$src\n\t" 17329 "vpmovsxbd $dst\t! load mask (8B to 8I)" %} 17330 ins_encode %{ 17331 int vector_len = 1; 17332 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 17333 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 17334 __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17335 %} 17336 ins_pipe( pipe_slow ); 17337 %} 17338 17339 instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ 17340 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 17341 match(Set dst (VectorLoadMask src)); 17342 effect(TEMP dst, TEMP tmp); 17343 format %{ "vpxor $dst,$dst\n\t" 17344 "vpmovzxbd $tmp,$src\n\t" 17345 "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} 17346 ins_encode %{ 17347 int vector_len = 2; 17348 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17349 __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 17350 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17351 %} 17352 ins_pipe( pipe_slow ); 17353 %} 17354 17355 instruct loadmask1l(vecD dst, vecS src) %{ 17356 predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17357 match(Set dst (VectorLoadMask src)); 17358 effect(TEMP dst); 17359 format %{ "pxor $dst,$dst\n\t" 17360 "psubb $dst,$src\n\t" 17361 "pmovsxbq $dst\t! load mask (1B to 1L)" %} 17362 ins_encode %{ 17363 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17364 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17365 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 17366 %} 17367 ins_pipe( pipe_slow ); 17368 %} 17369 17370 instruct loadmask2l(vecX dst, vecS src) %{ 17371 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17372 match(Set dst (VectorLoadMask src)); 17373 effect(TEMP dst); 17374 format %{ "pxor $dst,$dst\n\t" 17375 "psubb $dst,$src\n\t" 17376 "pmovsxbq $dst\t! load mask (2B to 2L)" %} 17377 ins_encode %{ 17378 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 17379 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 17380 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 17381 %} 17382 ins_pipe( pipe_slow ); 17383 %} 17384 17385 instruct loadmask4l(vecY dst, vecS src) %{ 17386 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17387 match(Set dst (VectorLoadMask src)); 17388 effect(TEMP dst); 17389 format %{ "vpxor $dst,$dst\n\t" 17390 "vpsubb $dst,$src\n\t" 17391 "vpmovsxbq $dst\t! load mask (4B to 4L)" %} 17392 ins_encode %{ 17393 int vector_len = 1; 17394 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 17395 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 17396 __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17397 %} 17398 ins_pipe( pipe_slow ); 17399 %} 17400 17401 instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ 17402 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 17403 match(Set dst (VectorLoadMask src)); 17404 effect(TEMP dst, TEMP tmp); 17405 format %{ "vpxor $dst,$dst\n\t" 17406 "vpmovzxbq $tmp,$src\n\t" 17407 "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} 17408 ins_encode %{ 17409 int vector_len = 2; 17410 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17411 __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 17412 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17413 %} 17414 ins_pipe( pipe_slow ); 17415 %} 17416 17417 instruct storemask8b(vecD dst, vecD src) %{ 17418 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 17419 match(Set dst (VectorStoreMask src)); 17420 format %{ "vpabsb $dst,$src\t! store mask (8B to 8B)" %} 17421 ins_encode %{ 17422 int vector_len = 0; 17423 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17424 %} 17425 ins_pipe( pipe_slow ); 17426 %} 17427 17428 instruct storemask16b(vecX dst, vecX src) %{ 17429 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 17430 match(Set dst (VectorStoreMask src)); 17431 format %{ "vpabsb $dst,$src\t! store mask (16B to 16B)" %} 17432 ins_encode %{ 17433 int vector_len = 0; 17434 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17435 %} 17436 ins_pipe( pipe_slow ); 17437 %} 17438 17439 instruct storemask32b(vecY dst, vecY src) %{ 17440 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 17441 match(Set dst (VectorStoreMask src)); 17442 format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} 17443 ins_encode %{ 17444 int vector_len = 1; 17445 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17446 %} 17447 ins_pipe( pipe_slow ); 17448 %} 17449 17450 instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ 17451 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 17452 match(Set dst (VectorStoreMask src)); 17453 effect(TEMP scratch); 17454 format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" 17455 "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} 17456 ins_encode %{ 17457 int vector_len = 2; 17458 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17459 Assembler::ComparisonPredicate cp = Assembler::eq; 17460 __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 17461 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, vector_len, $scratch$$Register); 17462 %} 17463 ins_pipe( pipe_slow ); 17464 %} 17465 17466 instruct storemask4s(vecS dst, vecD src) %{ 17467 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 17468 match(Set dst (VectorStoreMask src)); 17469 format %{ "vpabsw $dst,$src\n\t" 17470 "vpackuswb $dst,$dst,$dst\t! store mask (4S to 4B)" %} 17471 ins_encode %{ 17472 int vector_len = 0; 17473 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17474 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17475 %} 17476 ins_pipe( pipe_slow ); 17477 %} 17478 17479 instruct storemask8s(vecD dst, vecX src) %{ 17480 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 17481 match(Set dst (VectorStoreMask src)); 17482 format %{ "vpabsw $dst,$src\n\t" 17483 "vpackuswb $dst,$dst,$dst\t! store mask (8S to 8B)" %} 17484 ins_encode %{ 17485 int vector_len = 0; 17486 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17487 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17488 %} 17489 ins_pipe( pipe_slow ); 17490 %} 17491 17492 instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ 17493 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 17494 match(Set dst (VectorStoreMask src)); 17495 effect(TEMP dst, TEMP tmp); 17496 format %{ "vpabsw $dst,$src\n\t" 17497 "vextracti128 $tmp,$dst\n\t" 17498 "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} 17499 ins_encode %{ 17500 int vector_len = 1; 17501 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17502 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 17503 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17504 %} 17505 ins_pipe( pipe_slow ); 17506 %} 17507 17508 instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ 17509 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 17510 match(Set dst (VectorStoreMask src)); 17511 effect(TEMP scratch); 17512 format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" 17513 "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} 17514 ins_encode %{ 17515 int vector_len = 2; 17516 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17517 Assembler::ComparisonPredicate cp = Assembler::eq; 17518 __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 17519 // The dst is 256-bit - thus we can do a smaller move. 17520 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 1, $scratch$$Register); 17521 %} 17522 ins_pipe( pipe_slow ); 17523 %} 17524 17525 17526 instruct storemask2i(vecS dst, vecD src) %{ 17527 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 17528 match(Set dst (VectorStoreMask src)); 17529 format %{ "vpabsd $dst,$src\n\t" 17530 "vpackusdw $dst,$dst,$dst\n\t" 17531 "vpackuswb $dst,$dst,$dst\t! store mask (2I to 2B)" %} 17532 ins_encode %{ 17533 int vector_len = 0; 17534 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17535 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17536 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17537 %} 17538 ins_pipe( pipe_slow ); 17539 %} 17540 17541 instruct storemask4i(vecS dst, vecX src) %{ 17542 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 17543 match(Set dst (VectorStoreMask src)); 17544 format %{ "vpabsd $dst,$src\n\t" 17545 "vpackusdw $dst,$dst,$dst\n\t" 17546 "vpackuswb $dst,$dst,$dst\t! store mask (4I to 4B)" %} 17547 ins_encode %{ 17548 int vector_len = 0; 17549 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17550 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17551 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17552 %} 17553 ins_pipe( pipe_slow ); 17554 %} 17555 17556 instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ 17557 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 17558 match(Set dst (VectorStoreMask src)); 17559 effect(TEMP dst, TEMP tmp); 17560 format %{ "vpxor $dst,$dst\n\t" 17561 "vpsubd $dst,$src\n\t" 17562 "vextracti128 $tmp,$dst\n\t" 17563 "vpackusdw $dst,$dst,$tmp\n\t" 17564 "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} 17565 ins_encode %{ 17566 int vector_len = 1; 17567 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17568 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 17569 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 17570 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17571 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 17572 %} 17573 ins_pipe( pipe_slow ); 17574 %} 17575 17576 instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ 17577 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 17578 match(Set dst (VectorStoreMask src)); 17579 effect(TEMP scratch); 17580 format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" 17581 "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} 17582 ins_encode %{ 17583 int vector_len = 2; 17584 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17585 __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 17586 // The dst is only 128-bit - thus we can do a smaller move. 17587 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 17588 %} 17589 ins_pipe( pipe_slow ); 17590 %} 17591 17592 instruct storemask1l(vecS dst, vecD src) %{ 17593 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 17594 match(Set dst (VectorStoreMask src)); 17595 format %{ "vpabsd $dst,$src\n\t" 17596 "vpackusdw $dst,$dst,$dst\n\t" 17597 "vpackuswb $dst,$dst,$dst\t! store mask (1L to 1B)" %} 17598 ins_encode %{ 17599 int vector_len = 0; 17600 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17601 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17602 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17603 %} 17604 ins_pipe( pipe_slow ); 17605 %} 17606 17607 instruct storemask2l(vecS dst, vecX src) %{ 17608 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 17609 match(Set dst (VectorStoreMask src)); 17610 format %{ "vpshufd $dst,$src,0x8\n\t" 17611 "vpabsd $dst,$dst\n\t" 17612 "vpackusdw $dst,$dst,$dst\n\t" 17613 "vpackuswb $dst,$dst,$dst\t! store mask (2L to 2B)" %} 17614 ins_encode %{ 17615 int vector_len = 0; 17616 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8, vector_len); 17617 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17618 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17619 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17620 %} 17621 ins_pipe( pipe_slow ); 17622 %} 17623 17624 instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ 17625 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 17626 match(Set dst (VectorStoreMask src)); 17627 effect(TEMP scratch, TEMP dst); 17628 format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" 17629 "vpermd $dst,$dst,$src," 17630 "vpabsd $dst,$dst\n\t" 17631 "vpackusdw $dst,$dst,$dst\n\t" 17632 "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} 17633 ins_encode %{ 17634 // vpermd and load are 256-bit, but all others are 128-bit instructions. 17635 int vector_len = 0; 17636 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); 17637 __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister); 17638 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17639 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17640 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 17641 %} 17642 ins_pipe( pipe_slow ); 17643 %} 17644 17645 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ 17646 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 17647 match(Set dst (VectorStoreMask src)); 17648 effect(TEMP scratch); 17649 format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" 17650 "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} 17651 ins_encode %{ 17652 int vector_len = 2; 17653 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 17654 Assembler::ComparisonPredicate cp = Assembler::eq; 17655 __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 17656 // The dst is only 128-bit - thus we can do a smaller move. 17657 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), true, 0, $scratch$$Register); 17658 %} 17659 ins_pipe( pipe_slow ); 17660 %} 17661 17662 // --------------------------------- FMA -------------------------------------- 17663 17664 // a * b + c 17665 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 17666 predicate(UseFMA && n->as_Vector()->length() == 2); 17667 match(Set c (FmaVD c (Binary a b))); 17668 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 17669 ins_cost(150); 17670 ins_encode %{ 17671 int vector_len = 0; 17672 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 17673 %} 17674 ins_pipe( pipe_slow ); 17675 %} 17676 17677 // a * b + c 17678 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 17679 predicate(UseFMA && n->as_Vector()->length() == 2); 17680 match(Set c (FmaVD c (Binary a (LoadVector b)))); 17681 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 17682 ins_cost(150); 17683 ins_encode %{ 17684 int vector_len = 0; 17685 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 17686 %} 17687 ins_pipe( pipe_slow ); 17688 %} 17689 17690 17691 // a * b + c 17692 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 17693 predicate(UseFMA && n->as_Vector()->length() == 4); 17694 match(Set c (FmaVD c (Binary a b))); 17695 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 17696 ins_cost(150); 17697 ins_encode %{ 17698 int vector_len = 1; 17699 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 17700 %} 17701 ins_pipe( pipe_slow ); 17702 %} 17703 17704 // a * b + c 17705 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 17706 predicate(UseFMA && n->as_Vector()->length() == 4); 17707 match(Set c (FmaVD c (Binary a (LoadVector b)))); 17708 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 17709 ins_cost(150); 17710 ins_encode %{ 17711 int vector_len = 1; 17712 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 17713 %} 17714 ins_pipe( pipe_slow ); 17715 %} 17716 17717 // a * b + c 17718 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 17719 predicate(UseFMA && n->as_Vector()->length() == 8); 17720 match(Set c (FmaVD c (Binary a b))); 17721 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 17722 ins_cost(150); 17723 ins_encode %{ 17724 int vector_len = 2; 17725 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 17726 %} 17727 ins_pipe( pipe_slow ); 17728 %} 17729 17730 // a * b + c 17731 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 17732 predicate(UseFMA && n->as_Vector()->length() == 8); 17733 match(Set c (FmaVD c (Binary a (LoadVector b)))); 17734 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 17735 ins_cost(150); 17736 ins_encode %{ 17737 int vector_len = 2; 17738 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 17739 %} 17740 ins_pipe( pipe_slow ); 17741 %} 17742 17743 // a * b + c 17744 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 17745 predicate(UseFMA && n->as_Vector()->length() == 4); 17746 match(Set c (FmaVF c (Binary a b))); 17747 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 17748 ins_cost(150); 17749 ins_encode %{ 17750 int vector_len = 0; 17751 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 17752 %} 17753 ins_pipe( pipe_slow ); 17754 %} 17755 17756 // a * b + c 17757 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 17758 predicate(UseFMA && n->as_Vector()->length() == 4); 17759 match(Set c (FmaVF c (Binary a (LoadVector b)))); 17760 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 17761 ins_cost(150); 17762 ins_encode %{ 17763 int vector_len = 0; 17764 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 17765 %} 17766 ins_pipe( pipe_slow ); 17767 %} 17768 17769 // a * b + c 17770 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 17771 predicate(UseFMA && n->as_Vector()->length() == 8); 17772 match(Set c (FmaVF c (Binary a b))); 17773 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 17774 ins_cost(150); 17775 ins_encode %{ 17776 int vector_len = 1; 17777 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 17778 %} 17779 ins_pipe( pipe_slow ); 17780 %} 17781 17782 // a * b + c 17783 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 17784 predicate(UseFMA && n->as_Vector()->length() == 8); 17785 match(Set c (FmaVF c (Binary a (LoadVector b)))); 17786 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 17787 ins_cost(150); 17788 ins_encode %{ 17789 int vector_len = 1; 17790 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 17791 %} 17792 ins_pipe( pipe_slow ); 17793 %} 17794 17795 // a * b + c 17796 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 17797 predicate(UseFMA && n->as_Vector()->length() == 16); 17798 match(Set c (FmaVF c (Binary a b))); 17799 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 17800 ins_cost(150); 17801 ins_encode %{ 17802 int vector_len = 2; 17803 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 17804 %} 17805 ins_pipe( pipe_slow ); 17806 %} 17807 17808 // a * b + c 17809 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 17810 predicate(UseFMA && n->as_Vector()->length() == 16); 17811 match(Set c (FmaVF c (Binary a (LoadVector b)))); 17812 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 17813 ins_cost(150); 17814 ins_encode %{ 17815 int vector_len = 2; 17816 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 17817 %} 17818 ins_pipe( pipe_slow ); 17819 %} 17820 17821 // --------------------------------- PopCount -------------------------------------- 17822 17823 instruct vpopcount2I(vecD dst, vecD src) %{ 17824 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 17825 match(Set dst (PopCountVI src)); 17826 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 17827 ins_encode %{ 17828 int vector_len = 0; 17829 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17830 %} 17831 ins_pipe( pipe_slow ); 17832 %} 17833 17834 instruct vpopcount4I(vecX dst, vecX src) %{ 17835 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 17836 match(Set dst (PopCountVI src)); 17837 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 17838 ins_encode %{ 17839 int vector_len = 0; 17840 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17841 %} 17842 ins_pipe( pipe_slow ); 17843 %} 17844 17845 instruct vpopcount8I(vecY dst, vecY src) %{ 17846 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 17847 match(Set dst (PopCountVI src)); 17848 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 17849 ins_encode %{ 17850 int vector_len = 1; 17851 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17852 %} 17853 ins_pipe( pipe_slow ); 17854 %} 17855 17856 instruct vpopcount16I(vecZ dst, vecZ src) %{ 17857 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 17858 match(Set dst (PopCountVI src)); 17859 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 17860 ins_encode %{ 17861 int vector_len = 2; 17862 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 17863 %} 17864 ins_pipe( pipe_slow ); 17865 %}