1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1075 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1076 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1077 1078 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1079 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1080 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1081 1082 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1083 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1084 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1085 1086 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1087 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1088 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1089 1090 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1091 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1092 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1093 1094 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1095 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1096 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1097 1098 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1099 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1100 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1101 1102 #ifdef _LP64 1103 1104 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1105 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1106 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1107 1108 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1109 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1110 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1111 1112 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1113 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1114 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1115 1116 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1117 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1118 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1119 1120 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1121 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1122 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1123 1124 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1125 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1126 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1127 1128 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1129 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1130 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1131 1132 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1133 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1134 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1135 1136 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1137 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1138 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1139 1140 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1141 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1142 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1143 1144 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1145 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1146 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1147 1148 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1149 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1150 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1151 1152 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1153 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1154 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1155 1156 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1157 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1158 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1159 1160 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1161 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1162 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1163 1164 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1165 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1166 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1167 1168 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1169 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1170 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1171 1172 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1173 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1174 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1175 1176 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1177 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1178 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1179 1180 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1181 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1182 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1183 1184 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1185 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1186 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1187 1188 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1189 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1190 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1191 1192 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1193 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1194 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1195 1196 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1197 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1198 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1199 1200 #endif 1201 1202 %} 1203 1204 1205 //----------SOURCE BLOCK------------------------------------------------------- 1206 // This is a block of C++ code which provides values, functions, and 1207 // definitions necessary in the rest of the architecture description 1208 1209 source_hpp %{ 1210 // Header information of the source block. 1211 // Method declarations/definitions which are used outside 1212 // the ad-scope can conveniently be defined here. 1213 // 1214 // To keep related declarations/definitions/uses close together, 1215 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1216 1217 class NativeJump; 1218 1219 class CallStubImpl { 1220 1221 //-------------------------------------------------------------- 1222 //---< Used for optimization in Compile::shorten_branches >--- 1223 //-------------------------------------------------------------- 1224 1225 public: 1226 // Size of call trampoline stub. 1227 static uint size_call_trampoline() { 1228 return 0; // no call trampolines on this platform 1229 } 1230 1231 // number of relocations needed by a call trampoline stub 1232 static uint reloc_call_trampoline() { 1233 return 0; // no call trampolines on this platform 1234 } 1235 }; 1236 1237 class HandlerImpl { 1238 1239 public: 1240 1241 static int emit_exception_handler(CodeBuffer &cbuf); 1242 static int emit_deopt_handler(CodeBuffer& cbuf); 1243 1244 static uint size_exception_handler() { 1245 // NativeCall instruction size is the same as NativeJump. 1246 // exception handler starts out as jump and can be patched to 1247 // a call be deoptimization. (4932387) 1248 // Note that this value is also credited (in output.cpp) to 1249 // the size of the code section. 1250 return NativeJump::instruction_size; 1251 } 1252 1253 #ifdef _LP64 1254 static uint size_deopt_handler() { 1255 // three 5 byte instructions plus one move for unreachable address. 1256 return 15+3; 1257 } 1258 #else 1259 static uint size_deopt_handler() { 1260 // NativeCall instruction size is the same as NativeJump. 1261 // exception handler starts out as jump and can be patched to 1262 // a call be deoptimization. (4932387) 1263 // Note that this value is also credited (in output.cpp) to 1264 // the size of the code section. 1265 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1266 } 1267 #endif 1268 }; 1269 1270 %} // end source_hpp 1271 1272 source %{ 1273 1274 #include "opto/addnode.hpp" 1275 1276 // Emit exception handler code. 1277 // Stuff framesize into a register and call a VM stub routine. 1278 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1279 1280 // Note that the code buffer's insts_mark is always relative to insts. 1281 // That's why we must use the macroassembler to generate a handler. 1282 MacroAssembler _masm(&cbuf); 1283 address base = __ start_a_stub(size_exception_handler()); 1284 if (base == NULL) { 1285 ciEnv::current()->record_failure("CodeCache is full"); 1286 return 0; // CodeBuffer::expand failed 1287 } 1288 int offset = __ offset(); 1289 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1290 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1291 __ end_a_stub(); 1292 return offset; 1293 } 1294 1295 // Emit deopt handler code. 1296 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1297 1298 // Note that the code buffer's insts_mark is always relative to insts. 1299 // That's why we must use the macroassembler to generate a handler. 1300 MacroAssembler _masm(&cbuf); 1301 address base = __ start_a_stub(size_deopt_handler()); 1302 if (base == NULL) { 1303 ciEnv::current()->record_failure("CodeCache is full"); 1304 return 0; // CodeBuffer::expand failed 1305 } 1306 int offset = __ offset(); 1307 1308 #ifdef _LP64 1309 address the_pc = (address) __ pc(); 1310 Label next; 1311 // push a "the_pc" on the stack without destroying any registers 1312 // as they all may be live. 1313 1314 // push address of "next" 1315 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1316 __ bind(next); 1317 // adjust it so it matches "the_pc" 1318 __ subptr(Address(rsp, 0), __ offset() - offset); 1319 #else 1320 InternalAddress here(__ pc()); 1321 __ pushptr(here.addr()); 1322 #endif 1323 1324 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1325 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1326 __ end_a_stub(); 1327 return offset; 1328 } 1329 1330 1331 //============================================================================= 1332 1333 // Float masks come from different places depending on platform. 1334 #ifdef _LP64 1335 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1336 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1337 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1338 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1339 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1340 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1341 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1342 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1343 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1344 static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } 1345 static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } 1346 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1347 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1348 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1349 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1350 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1351 static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); } 1352 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1353 static address vector_int_sizemask() { return StubRoutines::x86::vector_int_size_mask(); } 1354 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1355 static address vector_short_sizemask() { return StubRoutines::x86::vector_short_size_mask(); } 1356 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1357 static address vector_long_sizemask() { return StubRoutines::x86::vector_long_size_mask(); } 1358 #else 1359 static address float_signmask() { return (address)float_signmask_pool; } 1360 static address float_signflip() { return (address)float_signflip_pool; } 1361 static address double_signmask() { return (address)double_signmask_pool; } 1362 static address double_signflip() { return (address)double_signflip_pool; } 1363 #endif 1364 1365 1366 const bool Matcher::match_rule_supported(int opcode) { 1367 if (!has_match_rule(opcode)) 1368 return false; 1369 1370 bool ret_value = true; 1371 switch (opcode) { 1372 case Op_PopCountI: 1373 case Op_PopCountL: 1374 if (!UsePopCountInstruction) 1375 ret_value = false; 1376 break; 1377 case Op_PopCountVI: 1378 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1379 ret_value = false; 1380 break; 1381 case Op_MulVI: 1382 case Op_MulVL: 1383 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1384 ret_value = false; 1385 break; 1386 case Op_MulReductionVL: 1387 if (VM_Version::supports_avx512dq() == false) 1388 ret_value = false; 1389 break; 1390 case Op_AddReductionVL: 1391 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1392 ret_value = false; 1393 break; 1394 case Op_MulReductionVI: 1395 if (UseSSE < 4) // requires at least SSE4 1396 ret_value = false; 1397 break; 1398 case Op_AddReductionVF: 1399 case Op_AddReductionVD: 1400 case Op_MulReductionVF: 1401 case Op_MulReductionVD: 1402 if (UseSSE < 1) // requires at least SSE 1403 ret_value = false; 1404 break; 1405 case Op_SqrtVD: 1406 case Op_SqrtVF: 1407 if (UseAVX < 1) // enabled for AVX only 1408 ret_value = false; 1409 break; 1410 case Op_CompareAndSwapL: 1411 #ifdef _LP64 1412 case Op_CompareAndSwapP: 1413 #endif 1414 if (!VM_Version::supports_cx8()) 1415 ret_value = false; 1416 break; 1417 case Op_CMoveVF: 1418 case Op_CMoveVD: 1419 if (UseAVX < 1 || UseAVX > 2) 1420 ret_value = false; 1421 break; 1422 case Op_StrIndexOf: 1423 if (!UseSSE42Intrinsics) 1424 ret_value = false; 1425 break; 1426 case Op_StrIndexOfChar: 1427 if (!UseSSE42Intrinsics) 1428 ret_value = false; 1429 break; 1430 case Op_OnSpinWait: 1431 if (VM_Version::supports_on_spin_wait() == false) 1432 ret_value = false; 1433 break; 1434 } 1435 1436 return ret_value; // Per default match rules are supported. 1437 } 1438 1439 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt, int op_arity) { 1440 // identify extra cases that we might want to provide match rules for 1441 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1442 bool ret_value = match_rule_supported(opcode); 1443 if (ret_value) { 1444 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1445 if (!vector_size_supported(bt, vlen)) { 1446 ret_value = false; 1447 } else if (size_in_bits > 256 && UseAVX <= 2) { 1448 // Only AVX512 supports 512-bit vectors 1449 ret_value = false; 1450 } else if (UseAVX == 0 && size_in_bits > 128) { 1451 // Only AVX supports 256-bit vectors 1452 ret_value = false; 1453 } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { 1454 // Byte and Short types are not supported in AVX512 if AVX512BW is not true. 1455 ret_value = false; 1456 } else { 1457 switch (opcode) { 1458 case Op_AbsV: 1459 if (is_integral_type(bt) && UseSSE < 3) { ret_value = false; } 1460 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1461 else if (bt == T_LONG && UseAVX <= 2) { ret_value = false; } // Implementation limitation 1462 break; 1463 case Op_AddVB: 1464 case Op_SubVB: 1465 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1466 ret_value = false; 1467 break; 1468 case Op_MaxV: 1469 case Op_MinV: 1470 if (UseSSE < 4 && (bt == T_BYTE || bt == T_INT || bt == T_LONG)) 1471 ret_value = false; 1472 break; 1473 case Op_MulVB: 1474 if (size_in_bits <= 128 && UseSSE < 4) { ret_value = false; } 1475 else if (size_in_bits > 256 && UseAVX < 2) { ret_value = false; } 1476 break; 1477 case Op_LShiftVI: 1478 case Op_RShiftVI: 1479 case Op_URShiftVI: 1480 if (op_arity == 2 && UseAVX <= 1) 1481 ret_value = false; 1482 break; 1483 case Op_LShiftVL: 1484 case Op_RShiftVL: 1485 case Op_URShiftVL: 1486 if (op_arity == 2 && UseAVX <= 1) 1487 ret_value = false; 1488 break; 1489 case Op_URShiftVS: 1490 case Op_RShiftVS: 1491 case Op_LShiftVS: 1492 case Op_MulVS: 1493 case Op_AddVS: 1494 case Op_SubVS: 1495 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1496 ret_value = false; 1497 break; 1498 case Op_CMoveVF: 1499 if (vlen != 8) 1500 ret_value = false; 1501 break; 1502 case Op_CMoveVD: 1503 if (vlen != 4) 1504 ret_value = false; 1505 break; 1506 case Op_AddReductionVI: 1507 if (bt == T_INT && UseSSE < 3) { ret_value = false; } 1508 else if (is_subword_type(bt) && UseSSE <= 3) { ret_value = false; } 1509 break; 1510 case Op_AndReductionV: 1511 case Op_OrReductionV: 1512 case Op_XorReductionV: 1513 if (bt == T_BYTE && UseSSE <= 3) { ret_value = false; } 1514 break; 1515 case Op_VectorMaskCmp: 1516 if (UseAVX <= 0) { ret_value = false; } 1517 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1518 break; 1519 case Op_MinReductionV: 1520 case Op_MaxReductionV: 1521 if ((bt == T_INT || bt == T_LONG || bt == T_BYTE) && UseSSE <= 3) { ret_value = false; } 1522 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1523 break; 1524 case Op_VectorBlend: 1525 if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } 1526 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1527 break; 1528 case Op_VectorTest: 1529 if (UseAVX <= 0) { ret_value = false; } 1530 else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation 1531 break; 1532 case Op_VectorLoadMask: 1533 if (UseSSE <= 3) { ret_value = false; } 1534 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1535 else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation 1536 break; 1537 case Op_VectorLoadShuffle: 1538 case Op_VectorRearrange: 1539 if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation due to how shuffle is loaded 1540 else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation 1541 else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512vbmi()) { ret_value = false; } // Implementation limitation 1542 else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512vlbw()) { ret_value = false; } // Implementation limitation 1543 break; 1544 case Op_VectorStoreMask: 1545 if (UseAVX < 0) { ret_value = false; } // Implementation limitation 1546 else if ((size_in_bits >= 256 || bt == T_LONG || bt == T_DOUBLE) && UseAVX < 2) { ret_value = false; } // Implementation limitation 1547 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1548 else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation 1549 break; 1550 case Op_VectorCastB2X: 1551 if (UseAVX <= 0) { ret_value = false; } 1552 else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } 1553 break; 1554 case Op_VectorCastS2X: 1555 if (UseAVX <= 0) { ret_value = false; } 1556 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1557 else if (is_integral_type(bt) && vlen * type2aelembytes(T_SHORT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1558 break; 1559 case Op_VectorCastI2X: 1560 if (UseAVX <= 0) { ret_value = false; } 1561 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1562 else if (is_integral_type(bt) && vlen * type2aelembytes(T_INT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1563 break; 1564 case Op_VectorCastL2X: 1565 if (UseAVX <= 0) { ret_value = false; } 1566 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1567 else if (is_integral_type(bt) && vlen * type2aelembytes(T_LONG) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1568 else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { ret_value = false; } 1569 break; 1570 case Op_VectorCastF2X: 1571 // Casts from FP to integral types require special fixup logic not easily 1572 // implementable with vectors. 1573 if (UseAVX <= 0) { ret_value = false; } 1574 else if (bt != T_DOUBLE) { ret_value = false; } // Implementation limitation 1575 break; 1576 case Op_VectorCastD2X: 1577 // Casts from FP to integral types require special fixup logic not easily 1578 // implementable with vectors. 1579 if (UseAVX <= 0) { ret_value = false; } 1580 else if (bt != T_FLOAT) { ret_value = false; } // Implementation limitation 1581 break; 1582 case Op_VectorReinterpret: 1583 if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } 1584 break; 1585 case Op_MulReductionVI: 1586 if (bt ==T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } 1587 break; 1588 default: 1589 break; 1590 } 1591 } 1592 } 1593 if (ret_value) { 1594 assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && 1595 vector_size_supported(bt, vlen), "must be supported"); 1596 } 1597 1598 return ret_value; // Per default match rules are supported. 1599 } 1600 1601 const bool Matcher::has_predicated_vectors(void) { 1602 bool ret_value = false; 1603 if (UseAVX > 2) { 1604 ret_value = VM_Version::supports_avx512vl(); 1605 } 1606 1607 return ret_value; 1608 } 1609 1610 const int Matcher::float_pressure(int default_pressure_threshold) { 1611 int float_pressure_threshold = default_pressure_threshold; 1612 #ifdef _LP64 1613 if (UseAVX > 2) { 1614 // Increase pressure threshold on machines with AVX3 which have 1615 // 2x more XMM registers. 1616 float_pressure_threshold = default_pressure_threshold * 2; 1617 } 1618 #endif 1619 return float_pressure_threshold; 1620 } 1621 1622 // Max vector size in bytes. 0 if not supported. 1623 const int Matcher::vector_width_in_bytes(BasicType bt) { 1624 assert(is_java_primitive(bt), "only primitive type vectors"); 1625 if (UseSSE < 2) return 0; 1626 // SSE2 supports 128bit vectors for all types. 1627 // AVX2 supports 256bit vectors for all types. 1628 // AVX2/EVEX supports 512bit vectors for all types. 1629 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1630 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1631 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1632 size = (UseAVX > 2) ? 64 : 32; 1633 // Use flag to limit vector size. 1634 size = MIN2(size,(int)MaxVectorSize); 1635 // Minimum 2 values in vector (or 4 for bytes). 1636 switch (bt) { 1637 case T_DOUBLE: 1638 case T_LONG: 1639 if (size < 16) return 0; 1640 break; 1641 case T_FLOAT: 1642 case T_INT: 1643 if (size < 8) return 0; 1644 break; 1645 case T_BOOLEAN: 1646 if (size < 4) return 0; 1647 break; 1648 case T_CHAR: 1649 if (size < 4) return 0; 1650 break; 1651 case T_BYTE: 1652 if (size < 4) return 0; 1653 break; 1654 case T_SHORT: 1655 if (size < 4) return 0; 1656 break; 1657 default: 1658 ShouldNotReachHere(); 1659 } 1660 return size; 1661 } 1662 1663 // Limits on vector size (number of elements) loaded into vector. 1664 const int Matcher::max_vector_size(const BasicType bt) { 1665 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1666 } 1667 const int Matcher::min_vector_size(const BasicType bt) { 1668 int max_size = max_vector_size(bt); 1669 // Min size which can be loaded into vector is 4 bytes. 1670 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1671 return MIN2(size,max_size); 1672 } 1673 1674 // Vector ideal reg corresponding to specidied size in bytes 1675 const uint Matcher::vector_ideal_reg(int size) { 1676 assert(MaxVectorSize >= size, ""); 1677 switch(size) { 1678 case 4: return Op_VecS; 1679 case 8: return Op_VecD; 1680 case 16: return Op_VecX; 1681 case 32: return Op_VecY; 1682 case 64: return Op_VecZ; 1683 } 1684 ShouldNotReachHere(); 1685 return 0; 1686 } 1687 1688 // Only lowest bits of xmm reg are used for vector shift count. 1689 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1690 return Op_VecS; 1691 } 1692 1693 // x86 supports misaligned vectors store/load. 1694 const bool Matcher::misaligned_vectors_ok() { 1695 return !AlignVector; // can be changed by flag 1696 } 1697 1698 // x86 AES instructions are compatible with SunJCE expanded 1699 // keys, hence we do not need to pass the original key to stubs 1700 const bool Matcher::pass_original_key_for_aes() { 1701 return false; 1702 } 1703 1704 1705 const bool Matcher::convi2l_type_required = true; 1706 1707 // Check for shift by small constant as well 1708 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1709 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1710 shift->in(2)->get_int() <= 3 && 1711 // Are there other uses besides address expressions? 1712 !matcher->is_visited(shift)) { 1713 address_visited.set(shift->_idx); // Flag as address_visited 1714 mstack.push(shift->in(2), Matcher::Visit); 1715 Node *conv = shift->in(1); 1716 #ifdef _LP64 1717 // Allow Matcher to match the rule which bypass 1718 // ConvI2L operation for an array index on LP64 1719 // if the index value is positive. 1720 if (conv->Opcode() == Op_ConvI2L && 1721 conv->as_Type()->type()->is_long()->_lo >= 0 && 1722 // Are there other uses besides address expressions? 1723 !matcher->is_visited(conv)) { 1724 address_visited.set(conv->_idx); // Flag as address_visited 1725 mstack.push(conv->in(1), Matcher::Pre_Visit); 1726 } else 1727 #endif 1728 mstack.push(conv, Matcher::Pre_Visit); 1729 return true; 1730 } 1731 return false; 1732 } 1733 1734 // Should the Matcher clone shifts on addressing modes, expecting them 1735 // to be subsumed into complex addressing expressions or compute them 1736 // into registers? 1737 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1738 Node *off = m->in(AddPNode::Offset); 1739 if (off->is_Con()) { 1740 address_visited.test_set(m->_idx); // Flag as address_visited 1741 Node *adr = m->in(AddPNode::Address); 1742 1743 // Intel can handle 2 adds in addressing mode 1744 // AtomicAdd is not an addressing expression. 1745 // Cheap to find it by looking for screwy base. 1746 if (adr->is_AddP() && 1747 !adr->in(AddPNode::Base)->is_top() && 1748 // Are there other uses besides address expressions? 1749 !is_visited(adr)) { 1750 address_visited.set(adr->_idx); // Flag as address_visited 1751 Node *shift = adr->in(AddPNode::Offset); 1752 if (!clone_shift(shift, this, mstack, address_visited)) { 1753 mstack.push(shift, Pre_Visit); 1754 } 1755 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1756 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1757 } else { 1758 mstack.push(adr, Pre_Visit); 1759 } 1760 1761 // Clone X+offset as it also folds into most addressing expressions 1762 mstack.push(off, Visit); 1763 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1764 return true; 1765 } else if (clone_shift(off, this, mstack, address_visited)) { 1766 address_visited.test_set(m->_idx); // Flag as address_visited 1767 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1768 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1769 return true; 1770 } 1771 return false; 1772 } 1773 1774 void Compile::reshape_address(AddPNode* addp) { 1775 } 1776 1777 // Helper methods for MachSpillCopyNode::implementation(). 1778 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1779 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1780 // In 64-bit VM size calculation is very complex. Emitting instructions 1781 // into scratch buffer is used to get size in 64-bit VM. 1782 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1783 assert(ireg == Op_VecS || // 32bit vector 1784 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1785 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1786 "no non-adjacent vector moves" ); 1787 if (cbuf) { 1788 MacroAssembler _masm(cbuf); 1789 int offset = __ offset(); 1790 switch (ireg) { 1791 case Op_VecS: // copy whole register 1792 case Op_VecD: 1793 case Op_VecX: 1794 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1795 break; 1796 case Op_VecY: 1797 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1798 break; 1799 case Op_VecZ: 1800 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1801 break; 1802 default: 1803 ShouldNotReachHere(); 1804 } 1805 int size = __ offset() - offset; 1806 #ifdef ASSERT 1807 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1808 assert(!do_size || size == 4, "incorrect size calculattion"); 1809 #endif 1810 return size; 1811 #ifndef PRODUCT 1812 } else if (!do_size) { 1813 switch (ireg) { 1814 case Op_VecS: 1815 case Op_VecD: 1816 case Op_VecX: 1817 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1818 break; 1819 case Op_VecY: 1820 case Op_VecZ: 1821 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1822 break; 1823 default: 1824 ShouldNotReachHere(); 1825 } 1826 #endif 1827 } 1828 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1829 return (UseAVX > 2) ? 6 : 4; 1830 } 1831 1832 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1833 int stack_offset, int reg, uint ireg, outputStream* st) { 1834 // In 64-bit VM size calculation is very complex. Emitting instructions 1835 // into scratch buffer is used to get size in 64-bit VM. 1836 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1837 if (cbuf) { 1838 MacroAssembler _masm(cbuf); 1839 int offset = __ offset(); 1840 if (is_load) { 1841 switch (ireg) { 1842 case Op_VecS: 1843 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1844 break; 1845 case Op_VecD: 1846 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1847 break; 1848 case Op_VecX: 1849 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1850 break; 1851 case Op_VecY: 1852 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1853 break; 1854 case Op_VecZ: 1855 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1856 break; 1857 default: 1858 ShouldNotReachHere(); 1859 } 1860 } else { // store 1861 switch (ireg) { 1862 case Op_VecS: 1863 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1864 break; 1865 case Op_VecD: 1866 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1867 break; 1868 case Op_VecX: 1869 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1870 break; 1871 case Op_VecY: 1872 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1873 break; 1874 case Op_VecZ: 1875 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1876 break; 1877 default: 1878 ShouldNotReachHere(); 1879 } 1880 } 1881 int size = __ offset() - offset; 1882 #ifdef ASSERT 1883 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1884 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1885 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1886 #endif 1887 return size; 1888 #ifndef PRODUCT 1889 } else if (!do_size) { 1890 if (is_load) { 1891 switch (ireg) { 1892 case Op_VecS: 1893 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1894 break; 1895 case Op_VecD: 1896 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1897 break; 1898 case Op_VecX: 1899 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1900 break; 1901 case Op_VecY: 1902 case Op_VecZ: 1903 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1904 break; 1905 default: 1906 ShouldNotReachHere(); 1907 } 1908 } else { // store 1909 switch (ireg) { 1910 case Op_VecS: 1911 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1912 break; 1913 case Op_VecD: 1914 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1915 break; 1916 case Op_VecX: 1917 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1918 break; 1919 case Op_VecY: 1920 case Op_VecZ: 1921 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1922 break; 1923 default: 1924 ShouldNotReachHere(); 1925 } 1926 } 1927 #endif 1928 } 1929 bool is_single_byte = false; 1930 int vec_len = 0; 1931 if ((UseAVX > 2) && (stack_offset != 0)) { 1932 int tuple_type = Assembler::EVEX_FVM; 1933 int input_size = Assembler::EVEX_32bit; 1934 switch (ireg) { 1935 case Op_VecS: 1936 tuple_type = Assembler::EVEX_T1S; 1937 break; 1938 case Op_VecD: 1939 tuple_type = Assembler::EVEX_T1S; 1940 input_size = Assembler::EVEX_64bit; 1941 break; 1942 case Op_VecX: 1943 break; 1944 case Op_VecY: 1945 vec_len = 1; 1946 break; 1947 case Op_VecZ: 1948 vec_len = 2; 1949 break; 1950 } 1951 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1952 } 1953 int offset_size = 0; 1954 int size = 5; 1955 if (UseAVX > 2 ) { 1956 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1957 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1958 size += 2; // Need an additional two bytes for EVEX encoding 1959 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1960 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1961 } else { 1962 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1963 size += 2; // Need an additional two bytes for EVEX encodding 1964 } 1965 } else { 1966 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1967 } 1968 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1969 return size+offset_size; 1970 } 1971 1972 static inline jint replicate4_imm(int con, int width) { 1973 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1974 assert(width == 1 || width == 2, "only byte or short types here"); 1975 int bit_width = width * 8; 1976 jint val = con; 1977 val &= (1 << bit_width) - 1; // mask off sign bits 1978 while(bit_width < 32) { 1979 val |= (val << bit_width); 1980 bit_width <<= 1; 1981 } 1982 return val; 1983 } 1984 1985 static inline jlong replicate8_imm(int con, int width) { 1986 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1987 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1988 int bit_width = width * 8; 1989 jlong val = con; 1990 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1991 while(bit_width < 64) { 1992 val |= (val << bit_width); 1993 bit_width <<= 1; 1994 } 1995 return val; 1996 } 1997 1998 #ifndef PRODUCT 1999 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2000 st->print("nop \t# %d bytes pad for loops and calls", _count); 2001 } 2002 #endif 2003 2004 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2005 MacroAssembler _masm(&cbuf); 2006 __ nop(_count); 2007 } 2008 2009 uint MachNopNode::size(PhaseRegAlloc*) const { 2010 return _count; 2011 } 2012 2013 #ifndef PRODUCT 2014 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2015 st->print("# breakpoint"); 2016 } 2017 #endif 2018 2019 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2020 MacroAssembler _masm(&cbuf); 2021 __ int3(); 2022 } 2023 2024 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2025 return MachNode::size(ra_); 2026 } 2027 2028 %} 2029 2030 encode %{ 2031 2032 enc_class call_epilog %{ 2033 if (VerifyStackAtCalls) { 2034 // Check that stack depth is unchanged: find majik cookie on stack 2035 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2036 MacroAssembler _masm(&cbuf); 2037 Label L; 2038 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2039 __ jccb(Assembler::equal, L); 2040 // Die if stack mismatch 2041 __ int3(); 2042 __ bind(L); 2043 } 2044 %} 2045 2046 %} 2047 2048 2049 //----------OPERANDS----------------------------------------------------------- 2050 // Operand definitions must precede instruction definitions for correct parsing 2051 // in the ADLC because operands constitute user defined types which are used in 2052 // instruction definitions. 2053 2054 operand immU1() %{ 2055 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(1)); 2056 match(ConI); 2057 2058 op_cost(0); 2059 format %{ %} 2060 interface(CONST_INTER); 2061 %} 2062 2063 operand immU2() %{ 2064 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(2)); 2065 match(ConI); 2066 2067 op_cost(0); 2068 format %{ %} 2069 interface(CONST_INTER); 2070 %} 2071 2072 operand immU3() %{ 2073 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(3)); 2074 match(ConI); 2075 2076 op_cost(0); 2077 format %{ %} 2078 interface(CONST_INTER); 2079 %} 2080 2081 operand immU4() %{ 2082 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(4)); 2083 match(ConI); 2084 2085 op_cost(0); 2086 format %{ %} 2087 interface(CONST_INTER); 2088 %} 2089 2090 operand immU5() %{ 2091 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(5)); 2092 match(ConI); 2093 2094 op_cost(0); 2095 format %{ %} 2096 interface(CONST_INTER); 2097 %} 2098 2099 operand immU6() %{ 2100 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(6)); 2101 match(ConI); 2102 2103 op_cost(0); 2104 format %{ %} 2105 interface(CONST_INTER); 2106 %} 2107 2108 // This one generically applies only for evex, so only one version 2109 operand vecZ() %{ 2110 constraint(ALLOC_IN_RC(vectorz_reg)); 2111 match(VecZ); 2112 2113 format %{ %} 2114 interface(REG_INTER); 2115 %} 2116 2117 // Comparison Code for FP conditional move 2118 operand cmpOp_vcmppd() %{ 2119 match(Bool); 2120 2121 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2122 n->as_Bool()->_test._test != BoolTest::no_overflow); 2123 format %{ "" %} 2124 interface(COND_INTER) %{ 2125 equal (0x0, "eq"); 2126 less (0x1, "lt"); 2127 less_equal (0x2, "le"); 2128 not_equal (0xC, "ne"); 2129 greater_equal(0xD, "ge"); 2130 greater (0xE, "gt"); 2131 //TODO cannot compile (adlc breaks) without two next lines with error: 2132 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2133 // equal' for overflow. 2134 overflow (0x20, "o"); // not really supported by the instruction 2135 no_overflow (0x21, "no"); // not really supported by the instruction 2136 %} 2137 %} 2138 2139 2140 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2141 2142 // ============================================================================ 2143 2144 instruct ShouldNotReachHere() %{ 2145 match(Halt); 2146 format %{ "ud2\t# ShouldNotReachHere" %} 2147 ins_encode %{ 2148 __ ud2(); 2149 %} 2150 ins_pipe(pipe_slow); 2151 %} 2152 2153 // =================================EVEX special=============================== 2154 2155 instruct setMask(rRegI dst, rRegI src) %{ 2156 predicate(Matcher::has_predicated_vectors()); 2157 match(Set dst (SetVectMaskI src)); 2158 effect(TEMP dst); 2159 format %{ "setvectmask $dst, $src" %} 2160 ins_encode %{ 2161 __ setvectmask($dst$$Register, $src$$Register); 2162 %} 2163 ins_pipe(pipe_slow); 2164 %} 2165 2166 // ============================================================================ 2167 2168 instruct addF_reg(regF dst, regF src) %{ 2169 predicate((UseSSE>=1) && (UseAVX == 0)); 2170 match(Set dst (AddF dst src)); 2171 2172 format %{ "addss $dst, $src" %} 2173 ins_cost(150); 2174 ins_encode %{ 2175 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2176 %} 2177 ins_pipe(pipe_slow); 2178 %} 2179 2180 instruct addF_mem(regF dst, memory src) %{ 2181 predicate((UseSSE>=1) && (UseAVX == 0)); 2182 match(Set dst (AddF dst (LoadF src))); 2183 2184 format %{ "addss $dst, $src" %} 2185 ins_cost(150); 2186 ins_encode %{ 2187 __ addss($dst$$XMMRegister, $src$$Address); 2188 %} 2189 ins_pipe(pipe_slow); 2190 %} 2191 2192 instruct addF_imm(regF dst, immF con) %{ 2193 predicate((UseSSE>=1) && (UseAVX == 0)); 2194 match(Set dst (AddF dst con)); 2195 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2196 ins_cost(150); 2197 ins_encode %{ 2198 __ addss($dst$$XMMRegister, $constantaddress($con)); 2199 %} 2200 ins_pipe(pipe_slow); 2201 %} 2202 2203 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2204 predicate(UseAVX > 0); 2205 match(Set dst (AddF src1 src2)); 2206 2207 format %{ "vaddss $dst, $src1, $src2" %} 2208 ins_cost(150); 2209 ins_encode %{ 2210 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2211 %} 2212 ins_pipe(pipe_slow); 2213 %} 2214 2215 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2216 predicate(UseAVX > 0); 2217 match(Set dst (AddF src1 (LoadF src2))); 2218 2219 format %{ "vaddss $dst, $src1, $src2" %} 2220 ins_cost(150); 2221 ins_encode %{ 2222 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2223 %} 2224 ins_pipe(pipe_slow); 2225 %} 2226 2227 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2228 predicate(UseAVX > 0); 2229 match(Set dst (AddF src con)); 2230 2231 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2232 ins_cost(150); 2233 ins_encode %{ 2234 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2235 %} 2236 ins_pipe(pipe_slow); 2237 %} 2238 2239 instruct addD_reg(regD dst, regD src) %{ 2240 predicate((UseSSE>=2) && (UseAVX == 0)); 2241 match(Set dst (AddD dst src)); 2242 2243 format %{ "addsd $dst, $src" %} 2244 ins_cost(150); 2245 ins_encode %{ 2246 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2247 %} 2248 ins_pipe(pipe_slow); 2249 %} 2250 2251 instruct addD_mem(regD dst, memory src) %{ 2252 predicate((UseSSE>=2) && (UseAVX == 0)); 2253 match(Set dst (AddD dst (LoadD src))); 2254 2255 format %{ "addsd $dst, $src" %} 2256 ins_cost(150); 2257 ins_encode %{ 2258 __ addsd($dst$$XMMRegister, $src$$Address); 2259 %} 2260 ins_pipe(pipe_slow); 2261 %} 2262 2263 instruct addD_imm(regD dst, immD con) %{ 2264 predicate((UseSSE>=2) && (UseAVX == 0)); 2265 match(Set dst (AddD dst con)); 2266 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2267 ins_cost(150); 2268 ins_encode %{ 2269 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2270 %} 2271 ins_pipe(pipe_slow); 2272 %} 2273 2274 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2275 predicate(UseAVX > 0); 2276 match(Set dst (AddD src1 src2)); 2277 2278 format %{ "vaddsd $dst, $src1, $src2" %} 2279 ins_cost(150); 2280 ins_encode %{ 2281 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2282 %} 2283 ins_pipe(pipe_slow); 2284 %} 2285 2286 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2287 predicate(UseAVX > 0); 2288 match(Set dst (AddD src1 (LoadD src2))); 2289 2290 format %{ "vaddsd $dst, $src1, $src2" %} 2291 ins_cost(150); 2292 ins_encode %{ 2293 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2294 %} 2295 ins_pipe(pipe_slow); 2296 %} 2297 2298 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2299 predicate(UseAVX > 0); 2300 match(Set dst (AddD src con)); 2301 2302 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2303 ins_cost(150); 2304 ins_encode %{ 2305 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2306 %} 2307 ins_pipe(pipe_slow); 2308 %} 2309 2310 instruct subF_reg(regF dst, regF src) %{ 2311 predicate((UseSSE>=1) && (UseAVX == 0)); 2312 match(Set dst (SubF dst src)); 2313 2314 format %{ "subss $dst, $src" %} 2315 ins_cost(150); 2316 ins_encode %{ 2317 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2318 %} 2319 ins_pipe(pipe_slow); 2320 %} 2321 2322 instruct subF_mem(regF dst, memory src) %{ 2323 predicate((UseSSE>=1) && (UseAVX == 0)); 2324 match(Set dst (SubF dst (LoadF src))); 2325 2326 format %{ "subss $dst, $src" %} 2327 ins_cost(150); 2328 ins_encode %{ 2329 __ subss($dst$$XMMRegister, $src$$Address); 2330 %} 2331 ins_pipe(pipe_slow); 2332 %} 2333 2334 instruct subF_imm(regF dst, immF con) %{ 2335 predicate((UseSSE>=1) && (UseAVX == 0)); 2336 match(Set dst (SubF dst con)); 2337 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2338 ins_cost(150); 2339 ins_encode %{ 2340 __ subss($dst$$XMMRegister, $constantaddress($con)); 2341 %} 2342 ins_pipe(pipe_slow); 2343 %} 2344 2345 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2346 predicate(UseAVX > 0); 2347 match(Set dst (SubF src1 src2)); 2348 2349 format %{ "vsubss $dst, $src1, $src2" %} 2350 ins_cost(150); 2351 ins_encode %{ 2352 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2353 %} 2354 ins_pipe(pipe_slow); 2355 %} 2356 2357 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2358 predicate(UseAVX > 0); 2359 match(Set dst (SubF src1 (LoadF src2))); 2360 2361 format %{ "vsubss $dst, $src1, $src2" %} 2362 ins_cost(150); 2363 ins_encode %{ 2364 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2365 %} 2366 ins_pipe(pipe_slow); 2367 %} 2368 2369 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2370 predicate(UseAVX > 0); 2371 match(Set dst (SubF src con)); 2372 2373 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2374 ins_cost(150); 2375 ins_encode %{ 2376 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2377 %} 2378 ins_pipe(pipe_slow); 2379 %} 2380 2381 instruct subD_reg(regD dst, regD src) %{ 2382 predicate((UseSSE>=2) && (UseAVX == 0)); 2383 match(Set dst (SubD dst src)); 2384 2385 format %{ "subsd $dst, $src" %} 2386 ins_cost(150); 2387 ins_encode %{ 2388 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2389 %} 2390 ins_pipe(pipe_slow); 2391 %} 2392 2393 instruct subD_mem(regD dst, memory src) %{ 2394 predicate((UseSSE>=2) && (UseAVX == 0)); 2395 match(Set dst (SubD dst (LoadD src))); 2396 2397 format %{ "subsd $dst, $src" %} 2398 ins_cost(150); 2399 ins_encode %{ 2400 __ subsd($dst$$XMMRegister, $src$$Address); 2401 %} 2402 ins_pipe(pipe_slow); 2403 %} 2404 2405 instruct subD_imm(regD dst, immD con) %{ 2406 predicate((UseSSE>=2) && (UseAVX == 0)); 2407 match(Set dst (SubD dst con)); 2408 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2409 ins_cost(150); 2410 ins_encode %{ 2411 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2412 %} 2413 ins_pipe(pipe_slow); 2414 %} 2415 2416 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2417 predicate(UseAVX > 0); 2418 match(Set dst (SubD src1 src2)); 2419 2420 format %{ "vsubsd $dst, $src1, $src2" %} 2421 ins_cost(150); 2422 ins_encode %{ 2423 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2424 %} 2425 ins_pipe(pipe_slow); 2426 %} 2427 2428 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2429 predicate(UseAVX > 0); 2430 match(Set dst (SubD src1 (LoadD src2))); 2431 2432 format %{ "vsubsd $dst, $src1, $src2" %} 2433 ins_cost(150); 2434 ins_encode %{ 2435 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2436 %} 2437 ins_pipe(pipe_slow); 2438 %} 2439 2440 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2441 predicate(UseAVX > 0); 2442 match(Set dst (SubD src con)); 2443 2444 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2445 ins_cost(150); 2446 ins_encode %{ 2447 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2448 %} 2449 ins_pipe(pipe_slow); 2450 %} 2451 2452 instruct mulF_reg(regF dst, regF src) %{ 2453 predicate((UseSSE>=1) && (UseAVX == 0)); 2454 match(Set dst (MulF dst src)); 2455 2456 format %{ "mulss $dst, $src" %} 2457 ins_cost(150); 2458 ins_encode %{ 2459 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2460 %} 2461 ins_pipe(pipe_slow); 2462 %} 2463 2464 instruct mulF_mem(regF dst, memory src) %{ 2465 predicate((UseSSE>=1) && (UseAVX == 0)); 2466 match(Set dst (MulF dst (LoadF src))); 2467 2468 format %{ "mulss $dst, $src" %} 2469 ins_cost(150); 2470 ins_encode %{ 2471 __ mulss($dst$$XMMRegister, $src$$Address); 2472 %} 2473 ins_pipe(pipe_slow); 2474 %} 2475 2476 instruct mulF_imm(regF dst, immF con) %{ 2477 predicate((UseSSE>=1) && (UseAVX == 0)); 2478 match(Set dst (MulF dst con)); 2479 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2480 ins_cost(150); 2481 ins_encode %{ 2482 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2483 %} 2484 ins_pipe(pipe_slow); 2485 %} 2486 2487 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2488 predicate(UseAVX > 0); 2489 match(Set dst (MulF src1 src2)); 2490 2491 format %{ "vmulss $dst, $src1, $src2" %} 2492 ins_cost(150); 2493 ins_encode %{ 2494 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2495 %} 2496 ins_pipe(pipe_slow); 2497 %} 2498 2499 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2500 predicate(UseAVX > 0); 2501 match(Set dst (MulF src1 (LoadF src2))); 2502 2503 format %{ "vmulss $dst, $src1, $src2" %} 2504 ins_cost(150); 2505 ins_encode %{ 2506 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2507 %} 2508 ins_pipe(pipe_slow); 2509 %} 2510 2511 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2512 predicate(UseAVX > 0); 2513 match(Set dst (MulF src con)); 2514 2515 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2516 ins_cost(150); 2517 ins_encode %{ 2518 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2519 %} 2520 ins_pipe(pipe_slow); 2521 %} 2522 2523 instruct mulD_reg(regD dst, regD src) %{ 2524 predicate((UseSSE>=2) && (UseAVX == 0)); 2525 match(Set dst (MulD dst src)); 2526 2527 format %{ "mulsd $dst, $src" %} 2528 ins_cost(150); 2529 ins_encode %{ 2530 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2531 %} 2532 ins_pipe(pipe_slow); 2533 %} 2534 2535 instruct mulD_mem(regD dst, memory src) %{ 2536 predicate((UseSSE>=2) && (UseAVX == 0)); 2537 match(Set dst (MulD dst (LoadD src))); 2538 2539 format %{ "mulsd $dst, $src" %} 2540 ins_cost(150); 2541 ins_encode %{ 2542 __ mulsd($dst$$XMMRegister, $src$$Address); 2543 %} 2544 ins_pipe(pipe_slow); 2545 %} 2546 2547 instruct mulD_imm(regD dst, immD con) %{ 2548 predicate((UseSSE>=2) && (UseAVX == 0)); 2549 match(Set dst (MulD dst con)); 2550 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2551 ins_cost(150); 2552 ins_encode %{ 2553 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2554 %} 2555 ins_pipe(pipe_slow); 2556 %} 2557 2558 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2559 predicate(UseAVX > 0); 2560 match(Set dst (MulD src1 src2)); 2561 2562 format %{ "vmulsd $dst, $src1, $src2" %} 2563 ins_cost(150); 2564 ins_encode %{ 2565 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2566 %} 2567 ins_pipe(pipe_slow); 2568 %} 2569 2570 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2571 predicate(UseAVX > 0); 2572 match(Set dst (MulD src1 (LoadD src2))); 2573 2574 format %{ "vmulsd $dst, $src1, $src2" %} 2575 ins_cost(150); 2576 ins_encode %{ 2577 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2578 %} 2579 ins_pipe(pipe_slow); 2580 %} 2581 2582 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2583 predicate(UseAVX > 0); 2584 match(Set dst (MulD src con)); 2585 2586 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2587 ins_cost(150); 2588 ins_encode %{ 2589 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2590 %} 2591 ins_pipe(pipe_slow); 2592 %} 2593 2594 instruct divF_reg(regF dst, regF src) %{ 2595 predicate((UseSSE>=1) && (UseAVX == 0)); 2596 match(Set dst (DivF dst src)); 2597 2598 format %{ "divss $dst, $src" %} 2599 ins_cost(150); 2600 ins_encode %{ 2601 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2602 %} 2603 ins_pipe(pipe_slow); 2604 %} 2605 2606 instruct divF_mem(regF dst, memory src) %{ 2607 predicate((UseSSE>=1) && (UseAVX == 0)); 2608 match(Set dst (DivF dst (LoadF src))); 2609 2610 format %{ "divss $dst, $src" %} 2611 ins_cost(150); 2612 ins_encode %{ 2613 __ divss($dst$$XMMRegister, $src$$Address); 2614 %} 2615 ins_pipe(pipe_slow); 2616 %} 2617 2618 instruct divF_imm(regF dst, immF con) %{ 2619 predicate((UseSSE>=1) && (UseAVX == 0)); 2620 match(Set dst (DivF dst con)); 2621 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2622 ins_cost(150); 2623 ins_encode %{ 2624 __ divss($dst$$XMMRegister, $constantaddress($con)); 2625 %} 2626 ins_pipe(pipe_slow); 2627 %} 2628 2629 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2630 predicate(UseAVX > 0); 2631 match(Set dst (DivF src1 src2)); 2632 2633 format %{ "vdivss $dst, $src1, $src2" %} 2634 ins_cost(150); 2635 ins_encode %{ 2636 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2637 %} 2638 ins_pipe(pipe_slow); 2639 %} 2640 2641 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2642 predicate(UseAVX > 0); 2643 match(Set dst (DivF src1 (LoadF src2))); 2644 2645 format %{ "vdivss $dst, $src1, $src2" %} 2646 ins_cost(150); 2647 ins_encode %{ 2648 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2649 %} 2650 ins_pipe(pipe_slow); 2651 %} 2652 2653 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2654 predicate(UseAVX > 0); 2655 match(Set dst (DivF src con)); 2656 2657 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2658 ins_cost(150); 2659 ins_encode %{ 2660 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2661 %} 2662 ins_pipe(pipe_slow); 2663 %} 2664 2665 instruct divD_reg(regD dst, regD src) %{ 2666 predicate((UseSSE>=2) && (UseAVX == 0)); 2667 match(Set dst (DivD dst src)); 2668 2669 format %{ "divsd $dst, $src" %} 2670 ins_cost(150); 2671 ins_encode %{ 2672 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2673 %} 2674 ins_pipe(pipe_slow); 2675 %} 2676 2677 instruct divD_mem(regD dst, memory src) %{ 2678 predicate((UseSSE>=2) && (UseAVX == 0)); 2679 match(Set dst (DivD dst (LoadD src))); 2680 2681 format %{ "divsd $dst, $src" %} 2682 ins_cost(150); 2683 ins_encode %{ 2684 __ divsd($dst$$XMMRegister, $src$$Address); 2685 %} 2686 ins_pipe(pipe_slow); 2687 %} 2688 2689 instruct divD_imm(regD dst, immD con) %{ 2690 predicate((UseSSE>=2) && (UseAVX == 0)); 2691 match(Set dst (DivD dst con)); 2692 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2693 ins_cost(150); 2694 ins_encode %{ 2695 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2696 %} 2697 ins_pipe(pipe_slow); 2698 %} 2699 2700 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2701 predicate(UseAVX > 0); 2702 match(Set dst (DivD src1 src2)); 2703 2704 format %{ "vdivsd $dst, $src1, $src2" %} 2705 ins_cost(150); 2706 ins_encode %{ 2707 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2708 %} 2709 ins_pipe(pipe_slow); 2710 %} 2711 2712 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2713 predicate(UseAVX > 0); 2714 match(Set dst (DivD src1 (LoadD src2))); 2715 2716 format %{ "vdivsd $dst, $src1, $src2" %} 2717 ins_cost(150); 2718 ins_encode %{ 2719 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2720 %} 2721 ins_pipe(pipe_slow); 2722 %} 2723 2724 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2725 predicate(UseAVX > 0); 2726 match(Set dst (DivD src con)); 2727 2728 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2729 ins_cost(150); 2730 ins_encode %{ 2731 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2732 %} 2733 ins_pipe(pipe_slow); 2734 %} 2735 2736 instruct absF_reg(regF dst) %{ 2737 predicate((UseSSE>=1) && (UseAVX == 0)); 2738 match(Set dst (AbsF dst)); 2739 ins_cost(150); 2740 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2741 ins_encode %{ 2742 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 instruct absF_reg_reg(regF dst, regF src) %{ 2748 predicate(VM_Version::supports_avxonly()); 2749 match(Set dst (AbsF src)); 2750 ins_cost(150); 2751 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2752 ins_encode %{ 2753 int vector_len = 0; 2754 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2755 ExternalAddress(float_signmask()), vector_len); 2756 %} 2757 ins_pipe(pipe_slow); 2758 %} 2759 2760 #ifdef _LP64 2761 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2762 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2763 match(Set dst (AbsF src)); 2764 ins_cost(150); 2765 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2766 ins_encode %{ 2767 int vector_len = 0; 2768 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2769 ExternalAddress(float_signmask()), vector_len); 2770 %} 2771 ins_pipe(pipe_slow); 2772 %} 2773 2774 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2775 predicate(VM_Version::supports_avx512novl()); 2776 match(Set dst (AbsF src1)); 2777 effect(TEMP src2); 2778 ins_cost(150); 2779 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2780 ins_encode %{ 2781 int vector_len = 0; 2782 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2783 ExternalAddress(float_signmask()), vector_len); 2784 %} 2785 ins_pipe(pipe_slow); 2786 %} 2787 #else // _LP64 2788 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2789 predicate(UseAVX > 2); 2790 match(Set dst (AbsF src)); 2791 ins_cost(150); 2792 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2793 ins_encode %{ 2794 int vector_len = 0; 2795 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2796 ExternalAddress(float_signmask()), vector_len); 2797 %} 2798 ins_pipe(pipe_slow); 2799 %} 2800 #endif 2801 2802 instruct absD_reg(regD dst) %{ 2803 predicate((UseSSE>=2) && (UseAVX == 0)); 2804 match(Set dst (AbsD dst)); 2805 ins_cost(150); 2806 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2807 "# abs double by sign masking" %} 2808 ins_encode %{ 2809 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2810 %} 2811 ins_pipe(pipe_slow); 2812 %} 2813 2814 instruct absD_reg_reg(regD dst, regD src) %{ 2815 predicate(VM_Version::supports_avxonly()); 2816 match(Set dst (AbsD src)); 2817 ins_cost(150); 2818 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2819 "# abs double by sign masking" %} 2820 ins_encode %{ 2821 int vector_len = 0; 2822 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2823 ExternalAddress(double_signmask()), vector_len); 2824 %} 2825 ins_pipe(pipe_slow); 2826 %} 2827 2828 #ifdef _LP64 2829 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2830 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2831 match(Set dst (AbsD src)); 2832 ins_cost(150); 2833 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2834 "# abs double by sign masking" %} 2835 ins_encode %{ 2836 int vector_len = 0; 2837 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2838 ExternalAddress(double_signmask()), vector_len); 2839 %} 2840 ins_pipe(pipe_slow); 2841 %} 2842 2843 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2844 predicate(VM_Version::supports_avx512novl()); 2845 match(Set dst (AbsD src1)); 2846 effect(TEMP src2); 2847 ins_cost(150); 2848 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2849 ins_encode %{ 2850 int vector_len = 0; 2851 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2852 ExternalAddress(double_signmask()), vector_len); 2853 %} 2854 ins_pipe(pipe_slow); 2855 %} 2856 #else // _LP64 2857 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2858 predicate(UseAVX > 2); 2859 match(Set dst (AbsD src)); 2860 ins_cost(150); 2861 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2862 "# abs double by sign masking" %} 2863 ins_encode %{ 2864 int vector_len = 0; 2865 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2866 ExternalAddress(double_signmask()), vector_len); 2867 %} 2868 ins_pipe(pipe_slow); 2869 %} 2870 #endif 2871 2872 instruct negI_rReg_2(rRegI dst, rFlagsReg cr) 2873 %{ 2874 match(Set dst (NegI dst)); 2875 effect(KILL cr); 2876 2877 format %{ "negl $dst\t# int" %} 2878 ins_encode %{ 2879 __ negl($dst$$Register); 2880 %} 2881 ins_pipe(ialu_reg); 2882 %} 2883 2884 instruct negL_rReg_2(rRegL dst, rFlagsReg cr) 2885 %{ 2886 match(Set dst (NegL dst)); 2887 effect(KILL cr); 2888 2889 format %{ "negq $dst\t# int" %} 2890 ins_encode %{ 2891 __ negq($dst$$Register); 2892 %} 2893 ins_pipe(ialu_reg); 2894 %} 2895 2896 instruct negF_reg(regF dst) %{ 2897 predicate((UseSSE>=1) && (UseAVX == 0)); 2898 match(Set dst (NegF dst)); 2899 ins_cost(150); 2900 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2901 ins_encode %{ 2902 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2903 %} 2904 ins_pipe(pipe_slow); 2905 %} 2906 2907 instruct negF_reg_reg(regF dst, regF src) %{ 2908 predicate(UseAVX > 0); 2909 match(Set dst (NegF src)); 2910 ins_cost(150); 2911 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2912 ins_encode %{ 2913 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2914 ExternalAddress(float_signflip())); 2915 %} 2916 ins_pipe(pipe_slow); 2917 %} 2918 2919 instruct negD_reg(regD dst) %{ 2920 predicate((UseSSE>=2) && (UseAVX == 0)); 2921 match(Set dst (NegD dst)); 2922 ins_cost(150); 2923 format %{ "xorpd $dst, [0x8000000000000000]\t" 2924 "# neg double by sign flipping" %} 2925 ins_encode %{ 2926 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2927 %} 2928 ins_pipe(pipe_slow); 2929 %} 2930 2931 instruct negD_reg_reg(regD dst, regD src) %{ 2932 predicate(UseAVX > 0); 2933 match(Set dst (NegD src)); 2934 ins_cost(150); 2935 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2936 "# neg double by sign flipping" %} 2937 ins_encode %{ 2938 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2939 ExternalAddress(double_signflip())); 2940 %} 2941 ins_pipe(pipe_slow); 2942 %} 2943 2944 instruct sqrtF_reg(regF dst, regF src) %{ 2945 predicate(UseSSE>=1); 2946 match(Set dst (SqrtF src)); 2947 2948 format %{ "sqrtss $dst, $src" %} 2949 ins_cost(150); 2950 ins_encode %{ 2951 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2952 %} 2953 ins_pipe(pipe_slow); 2954 %} 2955 2956 instruct sqrtF_mem(regF dst, memory src) %{ 2957 predicate(UseSSE>=1); 2958 match(Set dst (SqrtF (LoadF src))); 2959 2960 format %{ "sqrtss $dst, $src" %} 2961 ins_cost(150); 2962 ins_encode %{ 2963 __ sqrtss($dst$$XMMRegister, $src$$Address); 2964 %} 2965 ins_pipe(pipe_slow); 2966 %} 2967 2968 instruct sqrtF_imm(regF dst, immF con) %{ 2969 predicate(UseSSE>=1); 2970 match(Set dst (SqrtF con)); 2971 2972 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2973 ins_cost(150); 2974 ins_encode %{ 2975 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2976 %} 2977 ins_pipe(pipe_slow); 2978 %} 2979 2980 instruct sqrtD_reg(regD dst, regD src) %{ 2981 predicate(UseSSE>=2); 2982 match(Set dst (SqrtD src)); 2983 2984 format %{ "sqrtsd $dst, $src" %} 2985 ins_cost(150); 2986 ins_encode %{ 2987 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2988 %} 2989 ins_pipe(pipe_slow); 2990 %} 2991 2992 instruct sqrtD_mem(regD dst, memory src) %{ 2993 predicate(UseSSE>=2); 2994 match(Set dst (SqrtD (LoadD src))); 2995 2996 format %{ "sqrtsd $dst, $src" %} 2997 ins_cost(150); 2998 ins_encode %{ 2999 __ sqrtsd($dst$$XMMRegister, $src$$Address); 3000 %} 3001 ins_pipe(pipe_slow); 3002 %} 3003 3004 instruct sqrtD_imm(regD dst, immD con) %{ 3005 predicate(UseSSE>=2); 3006 match(Set dst (SqrtD con)); 3007 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3008 ins_cost(150); 3009 ins_encode %{ 3010 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 3011 %} 3012 ins_pipe(pipe_slow); 3013 %} 3014 3015 instruct onspinwait() %{ 3016 match(OnSpinWait); 3017 ins_cost(200); 3018 3019 format %{ 3020 $$template 3021 if (os::is_MP()) { 3022 $$emit$$"pause\t! membar_onspinwait" 3023 } else { 3024 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 3025 } 3026 %} 3027 ins_encode %{ 3028 __ pause(); 3029 %} 3030 ins_pipe(pipe_slow); 3031 %} 3032 3033 // a * b + c 3034 instruct fmaD_reg(regD a, regD b, regD c) %{ 3035 predicate(UseFMA); 3036 match(Set c (FmaD c (Binary a b))); 3037 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3038 ins_cost(150); 3039 ins_encode %{ 3040 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3041 %} 3042 ins_pipe( pipe_slow ); 3043 %} 3044 3045 // a * b + c 3046 instruct fmaF_reg(regF a, regF b, regF c) %{ 3047 predicate(UseFMA); 3048 match(Set c (FmaF c (Binary a b))); 3049 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3050 ins_cost(150); 3051 ins_encode %{ 3052 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3053 %} 3054 ins_pipe( pipe_slow ); 3055 %} 3056 3057 // ====================VECTOR INSTRUCTIONS===================================== 3058 3059 instruct reinterpretS(vecS dst) %{ 3060 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3061 match(Set dst (VectorReinterpret dst)); 3062 ins_cost(125); 3063 format %{ " # reinterpret $dst" %} 3064 ins_encode %{ 3065 // empty 3066 %} 3067 ins_pipe( pipe_slow ); 3068 %} 3069 3070 instruct reinterpretS2D(vecD dst, vecS src, rRegL scratch) %{ 3071 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3072 match(Set dst (VectorReinterpret src)); 3073 ins_cost(125); 3074 effect(TEMP dst, TEMP scratch); 3075 format %{ " # reinterpret $dst,$src" %} 3076 ins_encode %{ 3077 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3078 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3079 %} 3080 ins_pipe( pipe_slow ); 3081 %} 3082 3083 instruct reinterpretS2D_avx(vecD dst, vecS src, rRegL scratch) %{ 3084 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3085 match(Set dst (VectorReinterpret src)); 3086 ins_cost(125); 3087 effect(TEMP dst, TEMP scratch); 3088 format %{ " # reinterpret $dst,$src" %} 3089 ins_encode %{ 3090 int vector_len = 0; 3091 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3092 %} 3093 ins_pipe( pipe_slow ); 3094 %} 3095 3096 instruct reinterpretS2X(vecX dst, vecS src, rRegL scratch) %{ 3097 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3098 match(Set dst (VectorReinterpret src)); 3099 ins_cost(125); 3100 effect(TEMP dst, TEMP scratch); 3101 format %{ " # reinterpret $dst,$src" %} 3102 ins_encode %{ 3103 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3104 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3105 %} 3106 ins_pipe( pipe_slow ); 3107 %} 3108 3109 instruct reinterpretS2X_avx(vecX dst, vecS src, rRegL scratch) %{ 3110 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3111 match(Set dst (VectorReinterpret src)); 3112 ins_cost(125); 3113 effect(TEMP scratch); 3114 format %{ " # reinterpret $dst,$src" %} 3115 ins_encode %{ 3116 int vector_len = 0; 3117 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3118 %} 3119 ins_pipe( pipe_slow ); 3120 %} 3121 3122 instruct reinterpretS2Y(vecY dst, vecS src, rRegL scratch) %{ 3123 predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3124 match(Set dst (VectorReinterpret src)); 3125 ins_cost(125); 3126 effect(TEMP scratch); 3127 format %{ " # reinterpret $dst,$src" %} 3128 ins_encode %{ 3129 int vector_len = 1; 3130 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3131 %} 3132 ins_pipe( pipe_slow ); 3133 %} 3134 3135 instruct reinterpretS2Z(vecZ dst, vecS src, rRegL scratch) %{ 3136 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3137 match(Set dst (VectorReinterpret src)); 3138 ins_cost(125); 3139 effect(TEMP scratch); 3140 format %{ " # reinterpret $dst,$src" %} 3141 ins_encode %{ 3142 int vector_len = 2; 3143 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3144 %} 3145 ins_pipe( pipe_slow ); 3146 %} 3147 3148 instruct reinterpretD2S(vecS dst, vecD src) %{ 3149 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3150 match(Set dst (VectorReinterpret src)); 3151 ins_cost(125); 3152 format %{ " # reinterpret $dst,$src" %} 3153 ins_encode %{ 3154 // If register is the same, then move is not needed. 3155 if ($dst$$XMMRegister != $src$$XMMRegister) { 3156 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3157 } 3158 %} 3159 ins_pipe( pipe_slow ); 3160 %} 3161 3162 instruct reinterpretD(vecD dst) %{ 3163 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3164 match(Set dst (VectorReinterpret dst)); 3165 ins_cost(125); 3166 format %{ " # reinterpret $dst" %} 3167 ins_encode %{ 3168 // empty 3169 %} 3170 ins_pipe( pipe_slow ); 3171 %} 3172 3173 instruct reinterpretD2X(vecX dst, vecD src, rRegL scratch) %{ 3174 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3175 match(Set dst (VectorReinterpret src)); 3176 ins_cost(125); 3177 effect(TEMP dst, TEMP scratch); 3178 format %{ " # reinterpret $dst,$src" %} 3179 ins_encode %{ 3180 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3181 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 3186 instruct reinterpretD2X_avx(vecX dst, vecD src, rRegL scratch) %{ 3187 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3188 match(Set dst (VectorReinterpret src)); 3189 ins_cost(125); 3190 effect(TEMP dst, TEMP scratch); 3191 format %{ " # reinterpret $dst,$src" %} 3192 ins_encode %{ 3193 int vector_len = 0; 3194 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3195 %} 3196 ins_pipe( pipe_slow ); 3197 %} 3198 3199 instruct reinterpretD2Y(vecY dst, vecD src, rRegL scratch) %{ 3200 predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3201 match(Set dst (VectorReinterpret src)); 3202 ins_cost(125); 3203 effect(TEMP scratch); 3204 format %{ " # reinterpret $dst,$src" %} 3205 ins_encode %{ 3206 int vector_len = 1; 3207 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3208 %} 3209 ins_pipe( pipe_slow ); 3210 %} 3211 3212 instruct reinterpretD2Z(vecZ dst, vecD src, rRegL scratch) %{ 3213 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3214 match(Set dst (VectorReinterpret src)); 3215 ins_cost(125); 3216 effect(TEMP scratch); 3217 format %{ " # reinterpret $dst,$src" %} 3218 ins_encode %{ 3219 int vector_len = 2; 3220 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3221 %} 3222 ins_pipe( pipe_slow ); 3223 %} 3224 3225 instruct reinterpretX2S(vecS dst, vecX src) %{ 3226 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3227 match(Set dst (VectorReinterpret src)); 3228 ins_cost(125); 3229 format %{ " # reinterpret $dst,$src" %} 3230 ins_encode %{ 3231 // If register is the same, then move is not needed. 3232 if ($dst$$XMMRegister != $src$$XMMRegister) { 3233 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3234 } 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 3239 instruct reinterpretX2D(vecD dst, vecX src) %{ 3240 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3241 match(Set dst (VectorReinterpret src)); 3242 ins_cost(125); 3243 format %{ " # reinterpret $dst,$src" %} 3244 ins_encode %{ 3245 // If register is the same, then move is not needed. 3246 if ($dst$$XMMRegister != $src$$XMMRegister) { 3247 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3248 } 3249 %} 3250 ins_pipe( pipe_slow ); 3251 %} 3252 3253 instruct reinterpretX(vecX dst) %{ 3254 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3255 match(Set dst (VectorReinterpret dst)); 3256 ins_cost(125); 3257 format %{ " # reinterpret $dst" %} 3258 ins_encode %{ 3259 // empty 3260 %} 3261 ins_pipe( pipe_slow ); 3262 %} 3263 3264 instruct reinterpretX2Y(vecY dst, vecX src) %{ 3265 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3266 match(Set dst (VectorReinterpret src)); 3267 ins_cost(125); 3268 effect(TEMP dst); 3269 format %{ " # reinterpret $dst,$src" %} 3270 ins_encode %{ 3271 int vector_len = 1; 3272 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3273 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved 3274 %} 3275 ins_pipe( pipe_slow ); 3276 %} 3277 3278 instruct reinterpretX2Z(vecZ dst, vecX src) %{ 3279 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3280 match(Set dst (VectorReinterpret src)); 3281 ins_cost(125); 3282 effect(TEMP dst); 3283 format %{ " # reinterpret $dst,$src" %} 3284 ins_encode %{ 3285 int vector_len = 2; 3286 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3287 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved 3288 %} 3289 ins_pipe( pipe_slow ); 3290 %} 3291 3292 instruct reinterpretY2S(vecS dst, vecY src) %{ 3293 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3294 match(Set dst (VectorReinterpret src)); 3295 ins_cost(125); 3296 format %{ " # reinterpret $dst,$src" %} 3297 ins_encode %{ 3298 // If register is the same, then move is not needed. 3299 if ($dst$$XMMRegister != $src$$XMMRegister) { 3300 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3301 } 3302 %} 3303 ins_pipe( pipe_slow ); 3304 %} 3305 3306 instruct reinterpretY2D(vecD dst, vecY src) %{ 3307 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3308 match(Set dst (VectorReinterpret src)); 3309 ins_cost(125); 3310 format %{ " # reinterpret $dst,$src" %} 3311 ins_encode %{ 3312 // If register is the same, then move is not needed. 3313 if ($dst$$XMMRegister != $src$$XMMRegister) { 3314 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3315 } 3316 %} 3317 ins_pipe( pipe_slow ); 3318 %} 3319 3320 instruct reinterpretY2X(vecX dst, vecY src) %{ 3321 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3322 match(Set dst (VectorReinterpret src)); 3323 ins_cost(125); 3324 format %{ " # reinterpret $dst,$src" %} 3325 ins_encode %{ 3326 // If register is the same, then move is not needed. 3327 if ($dst$$XMMRegister != $src$$XMMRegister) { 3328 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3329 } 3330 %} 3331 ins_pipe( pipe_slow ); 3332 %} 3333 3334 instruct reinterpretY(vecY dst) %{ 3335 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3336 match(Set dst (VectorReinterpret dst)); 3337 ins_cost(125); 3338 format %{ " # reinterpret $dst" %} 3339 ins_encode %{ 3340 // empty 3341 %} 3342 ins_pipe( pipe_slow ); 3343 %} 3344 3345 instruct reinterpretY2Z(vecZ dst, vecY src) %{ 3346 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3347 match(Set dst (VectorReinterpret src)); 3348 ins_cost(125); 3349 effect(TEMP dst); 3350 format %{ " # reinterpret $dst,$src" %} 3351 ins_encode %{ 3352 int vector_len = 2; 3353 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3354 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3355 %} 3356 ins_pipe( pipe_slow ); 3357 %} 3358 3359 instruct reinterpretZ2S(vecS dst, vecZ src) %{ 3360 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3361 match(Set dst (VectorReinterpret src)); 3362 ins_cost(125); 3363 format %{ " # reinterpret $dst,$src" %} 3364 ins_encode %{ 3365 // If register is the same, then move is not needed. 3366 if ($dst$$XMMRegister != $src$$XMMRegister) { 3367 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3368 } 3369 %} 3370 ins_pipe( pipe_slow ); 3371 %} 3372 3373 instruct reinterpretZ2D(vecD dst, vecZ src) %{ 3374 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3375 match(Set dst (VectorReinterpret src)); 3376 ins_cost(125); 3377 format %{ " # reinterpret $dst,$src" %} 3378 ins_encode %{ 3379 // If register is the same, then move is not needed. 3380 if ($dst$$XMMRegister != $src$$XMMRegister) { 3381 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3382 } 3383 %} 3384 ins_pipe( pipe_slow ); 3385 %} 3386 3387 instruct reinterpretZ2X(vecX dst, vecZ src) %{ 3388 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3389 match(Set dst (VectorReinterpret src)); 3390 ins_cost(125); 3391 format %{ " # reinterpret $dst,$src" %} 3392 ins_encode %{ 3393 // If register is the same, then move is not needed. 3394 if ($dst$$XMMRegister != $src$$XMMRegister) { 3395 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3396 } 3397 %} 3398 ins_pipe( pipe_slow ); 3399 %} 3400 3401 instruct reinterpretZ2Y(vecY dst, vecZ src) %{ 3402 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3403 match(Set dst (VectorReinterpret src)); 3404 ins_cost(125); 3405 format %{ " # reinterpret $dst,$src" %} 3406 ins_encode %{ 3407 // If register is the same, then move is not needed. 3408 if ($dst$$XMMRegister != $src$$XMMRegister) { 3409 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3410 } 3411 %} 3412 ins_pipe( pipe_slow ); 3413 %} 3414 3415 instruct reinterpretZ(vecZ dst) %{ 3416 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3417 match(Set dst (VectorReinterpret dst)); 3418 ins_cost(125); 3419 format %{ " # reinterpret $dst" %} 3420 ins_encode %{ 3421 // empty 3422 %} 3423 ins_pipe( pipe_slow ); 3424 %} 3425 3426 // ========== 3427 3428 // Load vectors (1 byte long) 3429 instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ 3430 predicate(n->as_LoadVector()->memory_size() == 1); 3431 match(Set dst (LoadVector mem)); 3432 ins_cost(125); 3433 effect(TEMP tmp); 3434 format %{ "movzbl $tmp,$mem\n\t" 3435 "movd $dst,$tmp\t! load vector (1 byte)" %} 3436 ins_encode %{ 3437 __ movzbl($tmp$$Register, $mem$$Address); 3438 __ movdl($dst$$XMMRegister, $tmp$$Register); 3439 %} 3440 ins_pipe( pipe_slow ); 3441 %} 3442 3443 // Load vectors (2 bytes long) 3444 instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ 3445 predicate(n->as_LoadVector()->memory_size() == 2); 3446 match(Set dst (LoadVector mem)); 3447 ins_cost(125); 3448 effect(TEMP tmp); 3449 format %{ "movzwl $tmp,$mem\n\t" 3450 "movd $dst,$tmp\t! load vector (2 bytes)" %} 3451 ins_encode %{ 3452 __ movzwl($tmp$$Register, $mem$$Address); 3453 __ movdl($dst$$XMMRegister, $tmp$$Register); 3454 %} 3455 ins_pipe( pipe_slow ); 3456 %} 3457 3458 // Load vectors (4 bytes long) 3459 instruct loadV4(vecS dst, memory mem) %{ 3460 predicate(n->as_LoadVector()->memory_size() == 4); 3461 match(Set dst (LoadVector mem)); 3462 ins_cost(125); 3463 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3464 ins_encode %{ 3465 __ movdl($dst$$XMMRegister, $mem$$Address); 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 // Load vectors (8 bytes long) 3471 instruct loadV8(vecD dst, memory mem) %{ 3472 predicate(n->as_LoadVector()->memory_size() == 8); 3473 match(Set dst (LoadVector mem)); 3474 ins_cost(125); 3475 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3476 ins_encode %{ 3477 __ movq($dst$$XMMRegister, $mem$$Address); 3478 %} 3479 ins_pipe( pipe_slow ); 3480 %} 3481 3482 // Load vectors (16 bytes long) 3483 instruct loadV16(vecX dst, memory mem) %{ 3484 predicate(n->as_LoadVector()->memory_size() == 16); 3485 match(Set dst (LoadVector mem)); 3486 ins_cost(125); 3487 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3488 ins_encode %{ 3489 __ movdqu($dst$$XMMRegister, $mem$$Address); 3490 %} 3491 ins_pipe( pipe_slow ); 3492 %} 3493 3494 // Load vectors (32 bytes long) 3495 instruct loadV32(vecY dst, memory mem) %{ 3496 predicate(n->as_LoadVector()->memory_size() == 32); 3497 match(Set dst (LoadVector mem)); 3498 ins_cost(125); 3499 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3500 ins_encode %{ 3501 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3502 %} 3503 ins_pipe( pipe_slow ); 3504 %} 3505 3506 // Load vectors (64 bytes long) 3507 instruct loadV64_dword(vecZ dst, memory mem) %{ 3508 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3509 match(Set dst (LoadVector mem)); 3510 ins_cost(125); 3511 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3512 ins_encode %{ 3513 int vector_len = 2; 3514 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 // Load vectors (64 bytes long) 3520 instruct loadV64_qword(vecZ dst, memory mem) %{ 3521 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3522 match(Set dst (LoadVector mem)); 3523 ins_cost(125); 3524 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3525 ins_encode %{ 3526 int vector_len = 2; 3527 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3528 %} 3529 ins_pipe( pipe_slow ); 3530 %} 3531 3532 // Store vectors 3533 instruct storeV1(memory mem, vecS src, rRegI tmp) %{ 3534 predicate(n->as_StoreVector()->memory_size() == 1); 3535 match(Set mem (StoreVector mem src)); 3536 ins_cost(145); 3537 effect(TEMP tmp); 3538 format %{ "movd $tmp,$src\n\t" 3539 "movb $mem,$tmp\t! store vector (1 byte)" %} 3540 ins_encode %{ 3541 __ movdl($tmp$$Register, $src$$XMMRegister); 3542 __ movb($mem$$Address, $tmp$$Register); 3543 %} 3544 ins_pipe( pipe_slow ); 3545 %} 3546 3547 instruct storeV2(memory mem, vecS src, rRegI tmp) %{ 3548 predicate(n->as_StoreVector()->memory_size() == 2); 3549 match(Set mem (StoreVector mem src)); 3550 ins_cost(145); 3551 effect(TEMP tmp); 3552 format %{ "movd $tmp,$src\n\t" 3553 "movw $mem,$tmp\t! store vector (2 bytes)" %} 3554 ins_encode %{ 3555 __ movdl($tmp$$Register, $src$$XMMRegister); 3556 __ movw($mem$$Address, $tmp$$Register); 3557 %} 3558 ins_pipe( pipe_slow ); 3559 %} 3560 3561 instruct storeV4(memory mem, vecS src) %{ 3562 predicate(n->as_StoreVector()->memory_size() == 4); 3563 match(Set mem (StoreVector mem src)); 3564 ins_cost(145); 3565 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3566 ins_encode %{ 3567 __ movdl($mem$$Address, $src$$XMMRegister); 3568 %} 3569 ins_pipe( pipe_slow ); 3570 %} 3571 3572 instruct storeV8(memory mem, vecD src) %{ 3573 predicate(n->as_StoreVector()->memory_size() == 8); 3574 match(Set mem (StoreVector mem src)); 3575 ins_cost(145); 3576 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3577 ins_encode %{ 3578 __ movq($mem$$Address, $src$$XMMRegister); 3579 %} 3580 ins_pipe( pipe_slow ); 3581 %} 3582 3583 instruct storeV16(memory mem, vecX src) %{ 3584 predicate(n->as_StoreVector()->memory_size() == 16); 3585 match(Set mem (StoreVector mem src)); 3586 ins_cost(145); 3587 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3588 ins_encode %{ 3589 __ movdqu($mem$$Address, $src$$XMMRegister); 3590 %} 3591 ins_pipe( pipe_slow ); 3592 %} 3593 3594 instruct storeV32(memory mem, vecY src) %{ 3595 predicate(n->as_StoreVector()->memory_size() == 32); 3596 match(Set mem (StoreVector mem src)); 3597 ins_cost(145); 3598 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3599 ins_encode %{ 3600 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3601 %} 3602 ins_pipe( pipe_slow ); 3603 %} 3604 3605 instruct storeV64_dword(memory mem, vecZ src) %{ 3606 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3607 match(Set mem (StoreVector mem src)); 3608 ins_cost(145); 3609 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3610 ins_encode %{ 3611 int vector_len = 2; 3612 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 instruct storeV64_qword(memory mem, vecZ src) %{ 3618 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3619 match(Set mem (StoreVector mem src)); 3620 ins_cost(145); 3621 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3622 ins_encode %{ 3623 int vector_len = 2; 3624 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3625 %} 3626 ins_pipe( pipe_slow ); 3627 %} 3628 3629 // ====================LEGACY REPLICATE======================================= 3630 3631 instruct Repl4B_mem(vecS dst, memory mem) %{ 3632 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3633 match(Set dst (ReplicateB (LoadB mem))); 3634 format %{ "punpcklbw $dst,$mem\n\t" 3635 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3636 ins_encode %{ 3637 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3638 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 3643 instruct Repl8B_mem(vecD dst, memory mem) %{ 3644 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3645 match(Set dst (ReplicateB (LoadB mem))); 3646 format %{ "punpcklbw $dst,$mem\n\t" 3647 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3648 ins_encode %{ 3649 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3650 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3651 %} 3652 ins_pipe( pipe_slow ); 3653 %} 3654 3655 instruct Repl16B(vecX dst, rRegI src) %{ 3656 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3657 match(Set dst (ReplicateB src)); 3658 format %{ "movd $dst,$src\n\t" 3659 "punpcklbw $dst,$dst\n\t" 3660 "pshuflw $dst,$dst,0x00\n\t" 3661 "punpcklqdq $dst,$dst\t! replicate16B" %} 3662 ins_encode %{ 3663 __ movdl($dst$$XMMRegister, $src$$Register); 3664 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3665 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3666 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 instruct Repl16B_mem(vecX dst, memory mem) %{ 3672 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3673 match(Set dst (ReplicateB (LoadB mem))); 3674 format %{ "punpcklbw $dst,$mem\n\t" 3675 "pshuflw $dst,$dst,0x00\n\t" 3676 "punpcklqdq $dst,$dst\t! replicate16B" %} 3677 ins_encode %{ 3678 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3679 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3680 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3681 %} 3682 ins_pipe( pipe_slow ); 3683 %} 3684 3685 instruct Repl32B(vecY dst, rRegI src) %{ 3686 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3687 match(Set dst (ReplicateB src)); 3688 format %{ "movd $dst,$src\n\t" 3689 "punpcklbw $dst,$dst\n\t" 3690 "pshuflw $dst,$dst,0x00\n\t" 3691 "punpcklqdq $dst,$dst\n\t" 3692 "vinserti128_high $dst,$dst\t! replicate32B" %} 3693 ins_encode %{ 3694 __ movdl($dst$$XMMRegister, $src$$Register); 3695 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3696 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3697 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3698 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3699 %} 3700 ins_pipe( pipe_slow ); 3701 %} 3702 3703 instruct Repl32B_mem(vecY dst, memory mem) %{ 3704 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3705 match(Set dst (ReplicateB (LoadB mem))); 3706 format %{ "punpcklbw $dst,$mem\n\t" 3707 "pshuflw $dst,$dst,0x00\n\t" 3708 "punpcklqdq $dst,$dst\n\t" 3709 "vinserti128_high $dst,$dst\t! replicate32B" %} 3710 ins_encode %{ 3711 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3712 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3713 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3714 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3715 %} 3716 ins_pipe( pipe_slow ); 3717 %} 3718 3719 instruct Repl16B_imm(vecX dst, immI con) %{ 3720 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3721 match(Set dst (ReplicateB con)); 3722 format %{ "movq $dst,[$constantaddress]\n\t" 3723 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3724 ins_encode %{ 3725 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3726 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct Repl32B_imm(vecY dst, immI con) %{ 3732 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3733 match(Set dst (ReplicateB con)); 3734 format %{ "movq $dst,[$constantaddress]\n\t" 3735 "punpcklqdq $dst,$dst\n\t" 3736 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3737 ins_encode %{ 3738 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3739 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3740 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3741 %} 3742 ins_pipe( pipe_slow ); 3743 %} 3744 3745 instruct Repl4S(vecD dst, rRegI src) %{ 3746 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3747 match(Set dst (ReplicateS src)); 3748 format %{ "movd $dst,$src\n\t" 3749 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3750 ins_encode %{ 3751 __ movdl($dst$$XMMRegister, $src$$Register); 3752 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3753 %} 3754 ins_pipe( pipe_slow ); 3755 %} 3756 3757 instruct Repl4S_mem(vecD dst, memory mem) %{ 3758 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3759 match(Set dst (ReplicateS (LoadS mem))); 3760 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3761 ins_encode %{ 3762 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3763 %} 3764 ins_pipe( pipe_slow ); 3765 %} 3766 3767 instruct Repl8S(vecX dst, rRegI src) %{ 3768 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3769 match(Set dst (ReplicateS src)); 3770 format %{ "movd $dst,$src\n\t" 3771 "pshuflw $dst,$dst,0x00\n\t" 3772 "punpcklqdq $dst,$dst\t! replicate8S" %} 3773 ins_encode %{ 3774 __ movdl($dst$$XMMRegister, $src$$Register); 3775 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3776 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3777 %} 3778 ins_pipe( pipe_slow ); 3779 %} 3780 3781 instruct Repl8S_mem(vecX dst, memory mem) %{ 3782 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3783 match(Set dst (ReplicateS (LoadS mem))); 3784 format %{ "pshuflw $dst,$mem,0x00\n\t" 3785 "punpcklqdq $dst,$dst\t! replicate8S" %} 3786 ins_encode %{ 3787 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3788 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3789 %} 3790 ins_pipe( pipe_slow ); 3791 %} 3792 3793 instruct Repl8S_imm(vecX dst, immI con) %{ 3794 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3795 match(Set dst (ReplicateS con)); 3796 format %{ "movq $dst,[$constantaddress]\n\t" 3797 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3798 ins_encode %{ 3799 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3800 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3801 %} 3802 ins_pipe( pipe_slow ); 3803 %} 3804 3805 instruct Repl16S(vecY dst, rRegI src) %{ 3806 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3807 match(Set dst (ReplicateS src)); 3808 format %{ "movd $dst,$src\n\t" 3809 "pshuflw $dst,$dst,0x00\n\t" 3810 "punpcklqdq $dst,$dst\n\t" 3811 "vinserti128_high $dst,$dst\t! replicate16S" %} 3812 ins_encode %{ 3813 __ movdl($dst$$XMMRegister, $src$$Register); 3814 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3815 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3816 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct Repl16S_mem(vecY dst, memory mem) %{ 3822 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3823 match(Set dst (ReplicateS (LoadS mem))); 3824 format %{ "pshuflw $dst,$mem,0x00\n\t" 3825 "punpcklqdq $dst,$dst\n\t" 3826 "vinserti128_high $dst,$dst\t! replicate16S" %} 3827 ins_encode %{ 3828 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3829 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3830 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3831 %} 3832 ins_pipe( pipe_slow ); 3833 %} 3834 3835 instruct Repl16S_imm(vecY dst, immI con) %{ 3836 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3837 match(Set dst (ReplicateS con)); 3838 format %{ "movq $dst,[$constantaddress]\n\t" 3839 "punpcklqdq $dst,$dst\n\t" 3840 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3841 ins_encode %{ 3842 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3843 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3844 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3845 %} 3846 ins_pipe( pipe_slow ); 3847 %} 3848 3849 instruct Repl4I(vecX dst, rRegI src) %{ 3850 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3851 match(Set dst (ReplicateI src)); 3852 format %{ "movd $dst,$src\n\t" 3853 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3854 ins_encode %{ 3855 __ movdl($dst$$XMMRegister, $src$$Register); 3856 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3857 %} 3858 ins_pipe( pipe_slow ); 3859 %} 3860 3861 instruct Repl4I_mem(vecX dst, memory mem) %{ 3862 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3863 match(Set dst (ReplicateI (LoadI mem))); 3864 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3865 ins_encode %{ 3866 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 instruct Repl8I(vecY dst, rRegI src) %{ 3872 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3873 match(Set dst (ReplicateI src)); 3874 format %{ "movd $dst,$src\n\t" 3875 "pshufd $dst,$dst,0x00\n\t" 3876 "vinserti128_high $dst,$dst\t! replicate8I" %} 3877 ins_encode %{ 3878 __ movdl($dst$$XMMRegister, $src$$Register); 3879 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3880 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3881 %} 3882 ins_pipe( pipe_slow ); 3883 %} 3884 3885 instruct Repl8I_mem(vecY dst, memory mem) %{ 3886 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3887 match(Set dst (ReplicateI (LoadI mem))); 3888 format %{ "pshufd $dst,$mem,0x00\n\t" 3889 "vinserti128_high $dst,$dst\t! replicate8I" %} 3890 ins_encode %{ 3891 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3892 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3893 %} 3894 ins_pipe( pipe_slow ); 3895 %} 3896 3897 instruct Repl4I_imm(vecX dst, immI con) %{ 3898 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3899 match(Set dst (ReplicateI con)); 3900 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3901 "punpcklqdq $dst,$dst" %} 3902 ins_encode %{ 3903 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3904 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 instruct Repl8I_imm(vecY dst, immI con) %{ 3910 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3911 match(Set dst (ReplicateI con)); 3912 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3913 "punpcklqdq $dst,$dst\n\t" 3914 "vinserti128_high $dst,$dst" %} 3915 ins_encode %{ 3916 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3917 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3918 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 // Long could be loaded into xmm register directly from memory. 3924 instruct Repl2L_mem(vecX dst, memory mem) %{ 3925 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3926 match(Set dst (ReplicateL (LoadL mem))); 3927 format %{ "movq $dst,$mem\n\t" 3928 "punpcklqdq $dst,$dst\t! replicate2L" %} 3929 ins_encode %{ 3930 __ movq($dst$$XMMRegister, $mem$$Address); 3931 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3932 %} 3933 ins_pipe( pipe_slow ); 3934 %} 3935 3936 // Replicate long (8 byte) scalar to be vector 3937 #ifdef _LP64 3938 instruct Repl4L(vecY dst, rRegL src) %{ 3939 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3940 match(Set dst (ReplicateL src)); 3941 format %{ "movdq $dst,$src\n\t" 3942 "punpcklqdq $dst,$dst\n\t" 3943 "vinserti128_high $dst,$dst\t! replicate4L" %} 3944 ins_encode %{ 3945 __ movdq($dst$$XMMRegister, $src$$Register); 3946 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3947 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3948 %} 3949 ins_pipe( pipe_slow ); 3950 %} 3951 #else // _LP64 3952 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3953 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3954 match(Set dst (ReplicateL src)); 3955 effect(TEMP dst, USE src, TEMP tmp); 3956 format %{ "movdl $dst,$src.lo\n\t" 3957 "movdl $tmp,$src.hi\n\t" 3958 "punpckldq $dst,$tmp\n\t" 3959 "punpcklqdq $dst,$dst\n\t" 3960 "vinserti128_high $dst,$dst\t! replicate4L" %} 3961 ins_encode %{ 3962 __ movdl($dst$$XMMRegister, $src$$Register); 3963 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3964 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3965 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3966 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 #endif // _LP64 3971 3972 instruct Repl4L_imm(vecY dst, immL con) %{ 3973 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3974 match(Set dst (ReplicateL con)); 3975 format %{ "movq $dst,[$constantaddress]\n\t" 3976 "punpcklqdq $dst,$dst\n\t" 3977 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3978 ins_encode %{ 3979 __ movq($dst$$XMMRegister, $constantaddress($con)); 3980 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3981 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3982 %} 3983 ins_pipe( pipe_slow ); 3984 %} 3985 3986 instruct Repl4L_mem(vecY dst, memory mem) %{ 3987 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3988 match(Set dst (ReplicateL (LoadL mem))); 3989 format %{ "movq $dst,$mem\n\t" 3990 "punpcklqdq $dst,$dst\n\t" 3991 "vinserti128_high $dst,$dst\t! replicate4L" %} 3992 ins_encode %{ 3993 __ movq($dst$$XMMRegister, $mem$$Address); 3994 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3995 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3996 %} 3997 ins_pipe( pipe_slow ); 3998 %} 3999 4000 instruct Repl2F_mem(vecD dst, memory mem) %{ 4001 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 4002 match(Set dst (ReplicateF (LoadF mem))); 4003 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 4004 ins_encode %{ 4005 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 4006 %} 4007 ins_pipe( pipe_slow ); 4008 %} 4009 4010 instruct Repl4F_mem(vecX dst, memory mem) %{ 4011 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 4012 match(Set dst (ReplicateF (LoadF mem))); 4013 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 4014 ins_encode %{ 4015 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 4016 %} 4017 ins_pipe( pipe_slow ); 4018 %} 4019 4020 instruct Repl8F(vecY dst, regF src) %{ 4021 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 4022 match(Set dst (ReplicateF src)); 4023 format %{ "pshufd $dst,$src,0x00\n\t" 4024 "vinsertf128_high $dst,$dst\t! replicate8F" %} 4025 ins_encode %{ 4026 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4027 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4028 %} 4029 ins_pipe( pipe_slow ); 4030 %} 4031 4032 instruct Repl8F_mem(vecY dst, memory mem) %{ 4033 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 4034 match(Set dst (ReplicateF (LoadF mem))); 4035 format %{ "pshufd $dst,$mem,0x00\n\t" 4036 "vinsertf128_high $dst,$dst\t! replicate8F" %} 4037 ins_encode %{ 4038 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 4039 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4040 %} 4041 ins_pipe( pipe_slow ); 4042 %} 4043 4044 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 4045 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 4046 match(Set dst (ReplicateF zero)); 4047 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 4048 ins_encode %{ 4049 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4050 %} 4051 ins_pipe( fpu_reg_reg ); 4052 %} 4053 4054 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 4055 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 4056 match(Set dst (ReplicateF zero)); 4057 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 4058 ins_encode %{ 4059 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4060 %} 4061 ins_pipe( fpu_reg_reg ); 4062 %} 4063 4064 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 4065 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 4066 match(Set dst (ReplicateF zero)); 4067 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 4068 ins_encode %{ 4069 int vector_len = 1; 4070 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4071 %} 4072 ins_pipe( fpu_reg_reg ); 4073 %} 4074 4075 instruct Repl2D_mem(vecX dst, memory mem) %{ 4076 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 4077 match(Set dst (ReplicateD (LoadD mem))); 4078 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 4079 ins_encode %{ 4080 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 4081 %} 4082 ins_pipe( pipe_slow ); 4083 %} 4084 4085 instruct Repl4D(vecY dst, regD src) %{ 4086 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 4087 match(Set dst (ReplicateD src)); 4088 format %{ "pshufd $dst,$src,0x44\n\t" 4089 "vinsertf128_high $dst,$dst\t! replicate4D" %} 4090 ins_encode %{ 4091 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4092 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4093 %} 4094 ins_pipe( pipe_slow ); 4095 %} 4096 4097 instruct Repl4D_mem(vecY dst, memory mem) %{ 4098 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 4099 match(Set dst (ReplicateD (LoadD mem))); 4100 format %{ "pshufd $dst,$mem,0x44\n\t" 4101 "vinsertf128_high $dst,$dst\t! replicate4D" %} 4102 ins_encode %{ 4103 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 4104 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4105 %} 4106 ins_pipe( pipe_slow ); 4107 %} 4108 4109 // Replicate double (8 byte) scalar zero to be vector 4110 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 4111 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 4112 match(Set dst (ReplicateD zero)); 4113 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 4114 ins_encode %{ 4115 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4116 %} 4117 ins_pipe( fpu_reg_reg ); 4118 %} 4119 4120 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 4121 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 4122 match(Set dst (ReplicateD zero)); 4123 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 4124 ins_encode %{ 4125 int vector_len = 1; 4126 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4127 %} 4128 ins_pipe( fpu_reg_reg ); 4129 %} 4130 4131 // ====================GENERIC REPLICATE========================================== 4132 4133 // Replicate byte scalar to be vector 4134 instruct Repl4B(vecS dst, rRegI src) %{ 4135 predicate(n->as_Vector()->length() == 4); 4136 match(Set dst (ReplicateB src)); 4137 format %{ "movd $dst,$src\n\t" 4138 "punpcklbw $dst,$dst\n\t" 4139 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 4140 ins_encode %{ 4141 __ movdl($dst$$XMMRegister, $src$$Register); 4142 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4143 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 4148 instruct Repl8B(vecD dst, rRegI src) %{ 4149 predicate(n->as_Vector()->length() == 8); 4150 match(Set dst (ReplicateB src)); 4151 format %{ "movd $dst,$src\n\t" 4152 "punpcklbw $dst,$dst\n\t" 4153 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 4154 ins_encode %{ 4155 __ movdl($dst$$XMMRegister, $src$$Register); 4156 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4157 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4158 %} 4159 ins_pipe( pipe_slow ); 4160 %} 4161 4162 // Replicate byte scalar immediate to be vector by loading from const table. 4163 instruct Repl4B_imm(vecS dst, immI con) %{ 4164 predicate(n->as_Vector()->length() == 4); 4165 match(Set dst (ReplicateB con)); 4166 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 4167 ins_encode %{ 4168 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 4173 instruct Repl8B_imm(vecD dst, immI con) %{ 4174 predicate(n->as_Vector()->length() == 8); 4175 match(Set dst (ReplicateB con)); 4176 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 4177 ins_encode %{ 4178 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4179 %} 4180 ins_pipe( pipe_slow ); 4181 %} 4182 4183 // Replicate byte scalar zero to be vector 4184 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 4185 predicate(n->as_Vector()->length() == 4); 4186 match(Set dst (ReplicateB zero)); 4187 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 4188 ins_encode %{ 4189 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4190 %} 4191 ins_pipe( fpu_reg_reg ); 4192 %} 4193 4194 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 4195 predicate(n->as_Vector()->length() == 8); 4196 match(Set dst (ReplicateB zero)); 4197 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 4198 ins_encode %{ 4199 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4200 %} 4201 ins_pipe( fpu_reg_reg ); 4202 %} 4203 4204 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 4205 predicate(n->as_Vector()->length() == 16); 4206 match(Set dst (ReplicateB zero)); 4207 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 4208 ins_encode %{ 4209 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4210 %} 4211 ins_pipe( fpu_reg_reg ); 4212 %} 4213 4214 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 4215 predicate(n->as_Vector()->length() == 32); 4216 match(Set dst (ReplicateB zero)); 4217 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 4218 ins_encode %{ 4219 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4220 int vector_len = 1; 4221 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4222 %} 4223 ins_pipe( fpu_reg_reg ); 4224 %} 4225 4226 // Replicate char/short (2 byte) scalar to be vector 4227 instruct Repl2S(vecS dst, rRegI src) %{ 4228 predicate(n->as_Vector()->length() == 2); 4229 match(Set dst (ReplicateS src)); 4230 format %{ "movd $dst,$src\n\t" 4231 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 4232 ins_encode %{ 4233 __ movdl($dst$$XMMRegister, $src$$Register); 4234 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4235 %} 4236 ins_pipe( fpu_reg_reg ); 4237 %} 4238 4239 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 4240 instruct Repl2S_imm(vecS dst, immI con) %{ 4241 predicate(n->as_Vector()->length() == 2); 4242 match(Set dst (ReplicateS con)); 4243 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4244 ins_encode %{ 4245 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4246 %} 4247 ins_pipe( fpu_reg_reg ); 4248 %} 4249 4250 instruct Repl4S_imm(vecD dst, immI con) %{ 4251 predicate(n->as_Vector()->length() == 4); 4252 match(Set dst (ReplicateS con)); 4253 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4254 ins_encode %{ 4255 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4256 %} 4257 ins_pipe( fpu_reg_reg ); 4258 %} 4259 4260 // Replicate char/short (2 byte) scalar zero to be vector 4261 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4262 predicate(n->as_Vector()->length() == 2); 4263 match(Set dst (ReplicateS zero)); 4264 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4265 ins_encode %{ 4266 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4267 %} 4268 ins_pipe( fpu_reg_reg ); 4269 %} 4270 4271 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4272 predicate(n->as_Vector()->length() == 4); 4273 match(Set dst (ReplicateS zero)); 4274 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4275 ins_encode %{ 4276 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4277 %} 4278 ins_pipe( fpu_reg_reg ); 4279 %} 4280 4281 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4282 predicate(n->as_Vector()->length() == 8); 4283 match(Set dst (ReplicateS zero)); 4284 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4285 ins_encode %{ 4286 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4287 %} 4288 ins_pipe( fpu_reg_reg ); 4289 %} 4290 4291 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4292 predicate(n->as_Vector()->length() == 16); 4293 match(Set dst (ReplicateS zero)); 4294 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4295 ins_encode %{ 4296 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4297 int vector_len = 1; 4298 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4299 %} 4300 ins_pipe( fpu_reg_reg ); 4301 %} 4302 4303 // Replicate integer (4 byte) scalar to be vector 4304 instruct Repl2I(vecD dst, rRegI src) %{ 4305 predicate(n->as_Vector()->length() == 2); 4306 match(Set dst (ReplicateI src)); 4307 format %{ "movd $dst,$src\n\t" 4308 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4309 ins_encode %{ 4310 __ movdl($dst$$XMMRegister, $src$$Register); 4311 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4312 %} 4313 ins_pipe( fpu_reg_reg ); 4314 %} 4315 4316 // Integer could be loaded into xmm register directly from memory. 4317 instruct Repl2I_mem(vecD dst, memory mem) %{ 4318 predicate(n->as_Vector()->length() == 2); 4319 match(Set dst (ReplicateI (LoadI mem))); 4320 format %{ "movd $dst,$mem\n\t" 4321 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4322 ins_encode %{ 4323 __ movdl($dst$$XMMRegister, $mem$$Address); 4324 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4325 %} 4326 ins_pipe( fpu_reg_reg ); 4327 %} 4328 4329 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4330 instruct Repl2I_imm(vecD dst, immI con) %{ 4331 predicate(n->as_Vector()->length() == 2); 4332 match(Set dst (ReplicateI con)); 4333 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4334 ins_encode %{ 4335 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4336 %} 4337 ins_pipe( fpu_reg_reg ); 4338 %} 4339 4340 // Replicate integer (4 byte) scalar zero to be vector 4341 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4342 predicate(n->as_Vector()->length() == 2); 4343 match(Set dst (ReplicateI zero)); 4344 format %{ "pxor $dst,$dst\t! replicate2I" %} 4345 ins_encode %{ 4346 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4347 %} 4348 ins_pipe( fpu_reg_reg ); 4349 %} 4350 4351 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4352 predicate(n->as_Vector()->length() == 4); 4353 match(Set dst (ReplicateI zero)); 4354 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4355 ins_encode %{ 4356 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4357 %} 4358 ins_pipe( fpu_reg_reg ); 4359 %} 4360 4361 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4362 predicate(n->as_Vector()->length() == 8); 4363 match(Set dst (ReplicateI zero)); 4364 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4365 ins_encode %{ 4366 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4367 int vector_len = 1; 4368 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4369 %} 4370 ins_pipe( fpu_reg_reg ); 4371 %} 4372 4373 // Replicate long (8 byte) scalar to be vector 4374 #ifdef _LP64 4375 instruct Repl2L(vecX dst, rRegL src) %{ 4376 predicate(n->as_Vector()->length() == 2); 4377 match(Set dst (ReplicateL src)); 4378 format %{ "movdq $dst,$src\n\t" 4379 "punpcklqdq $dst,$dst\t! replicate2L" %} 4380 ins_encode %{ 4381 __ movdq($dst$$XMMRegister, $src$$Register); 4382 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4383 %} 4384 ins_pipe( pipe_slow ); 4385 %} 4386 #else // _LP64 4387 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4388 predicate(n->as_Vector()->length() == 2); 4389 match(Set dst (ReplicateL src)); 4390 effect(TEMP dst, USE src, TEMP tmp); 4391 format %{ "movdl $dst,$src.lo\n\t" 4392 "movdl $tmp,$src.hi\n\t" 4393 "punpckldq $dst,$tmp\n\t" 4394 "punpcklqdq $dst,$dst\t! replicate2L"%} 4395 ins_encode %{ 4396 __ movdl($dst$$XMMRegister, $src$$Register); 4397 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4398 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4399 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4400 %} 4401 ins_pipe( pipe_slow ); 4402 %} 4403 #endif // _LP64 4404 4405 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4406 instruct Repl2L_imm(vecX dst, immL con) %{ 4407 predicate(n->as_Vector()->length() == 2); 4408 match(Set dst (ReplicateL con)); 4409 format %{ "movq $dst,[$constantaddress]\n\t" 4410 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4411 ins_encode %{ 4412 __ movq($dst$$XMMRegister, $constantaddress($con)); 4413 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4414 %} 4415 ins_pipe( pipe_slow ); 4416 %} 4417 4418 // Replicate long (8 byte) scalar zero to be vector 4419 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4420 predicate(n->as_Vector()->length() == 2); 4421 match(Set dst (ReplicateL zero)); 4422 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4423 ins_encode %{ 4424 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4425 %} 4426 ins_pipe( fpu_reg_reg ); 4427 %} 4428 4429 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4430 predicate(n->as_Vector()->length() == 4); 4431 match(Set dst (ReplicateL zero)); 4432 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4433 ins_encode %{ 4434 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4435 int vector_len = 1; 4436 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4437 %} 4438 ins_pipe( fpu_reg_reg ); 4439 %} 4440 4441 // Replicate float (4 byte) scalar to be vector 4442 instruct Repl2F(vecD dst, regF src) %{ 4443 predicate(n->as_Vector()->length() == 2); 4444 match(Set dst (ReplicateF src)); 4445 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4446 ins_encode %{ 4447 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4448 %} 4449 ins_pipe( fpu_reg_reg ); 4450 %} 4451 4452 instruct Repl4F(vecX dst, regF src) %{ 4453 predicate(n->as_Vector()->length() == 4); 4454 match(Set dst (ReplicateF src)); 4455 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4456 ins_encode %{ 4457 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 // Replicate double (8 bytes) scalar to be vector 4463 instruct Repl2D(vecX dst, regD src) %{ 4464 predicate(n->as_Vector()->length() == 2); 4465 match(Set dst (ReplicateD src)); 4466 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4467 ins_encode %{ 4468 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4469 %} 4470 ins_pipe( pipe_slow ); 4471 %} 4472 4473 // ====================EVEX REPLICATE============================================= 4474 4475 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4476 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4477 match(Set dst (ReplicateB (LoadB mem))); 4478 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4479 ins_encode %{ 4480 int vector_len = 0; 4481 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4482 %} 4483 ins_pipe( pipe_slow ); 4484 %} 4485 4486 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4487 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4488 match(Set dst (ReplicateB (LoadB mem))); 4489 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4490 ins_encode %{ 4491 int vector_len = 0; 4492 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4493 %} 4494 ins_pipe( pipe_slow ); 4495 %} 4496 4497 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4498 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4499 match(Set dst (ReplicateB src)); 4500 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4501 ins_encode %{ 4502 int vector_len = 0; 4503 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4504 %} 4505 ins_pipe( pipe_slow ); 4506 %} 4507 4508 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4509 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4510 match(Set dst (ReplicateB (LoadB mem))); 4511 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4512 ins_encode %{ 4513 int vector_len = 0; 4514 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4515 %} 4516 ins_pipe( pipe_slow ); 4517 %} 4518 4519 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4520 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4521 match(Set dst (ReplicateB src)); 4522 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4523 ins_encode %{ 4524 int vector_len = 1; 4525 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4531 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4532 match(Set dst (ReplicateB (LoadB mem))); 4533 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4534 ins_encode %{ 4535 int vector_len = 1; 4536 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4542 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4543 match(Set dst (ReplicateB src)); 4544 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4545 ins_encode %{ 4546 int vector_len = 2; 4547 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4548 %} 4549 ins_pipe( pipe_slow ); 4550 %} 4551 4552 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4553 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4554 match(Set dst (ReplicateB (LoadB mem))); 4555 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4556 ins_encode %{ 4557 int vector_len = 2; 4558 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4559 %} 4560 ins_pipe( pipe_slow ); 4561 %} 4562 4563 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4564 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4565 match(Set dst (ReplicateB con)); 4566 format %{ "movq $dst,[$constantaddress]\n\t" 4567 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4568 ins_encode %{ 4569 int vector_len = 0; 4570 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4571 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4572 %} 4573 ins_pipe( pipe_slow ); 4574 %} 4575 4576 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4577 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4578 match(Set dst (ReplicateB con)); 4579 format %{ "movq $dst,[$constantaddress]\n\t" 4580 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4581 ins_encode %{ 4582 int vector_len = 1; 4583 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4584 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4590 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4591 match(Set dst (ReplicateB con)); 4592 format %{ "movq $dst,[$constantaddress]\n\t" 4593 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4594 ins_encode %{ 4595 int vector_len = 2; 4596 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4597 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4598 %} 4599 ins_pipe( pipe_slow ); 4600 %} 4601 4602 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4603 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4604 match(Set dst (ReplicateB zero)); 4605 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4606 ins_encode %{ 4607 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4608 int vector_len = 2; 4609 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4610 %} 4611 ins_pipe( fpu_reg_reg ); 4612 %} 4613 4614 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4615 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4616 match(Set dst (ReplicateS src)); 4617 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4618 ins_encode %{ 4619 int vector_len = 0; 4620 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4621 %} 4622 ins_pipe( pipe_slow ); 4623 %} 4624 4625 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4626 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4627 match(Set dst (ReplicateS (LoadS mem))); 4628 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4629 ins_encode %{ 4630 int vector_len = 0; 4631 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4632 %} 4633 ins_pipe( pipe_slow ); 4634 %} 4635 4636 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4637 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4638 match(Set dst (ReplicateS src)); 4639 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4640 ins_encode %{ 4641 int vector_len = 0; 4642 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4643 %} 4644 ins_pipe( pipe_slow ); 4645 %} 4646 4647 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4648 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4649 match(Set dst (ReplicateS (LoadS mem))); 4650 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4651 ins_encode %{ 4652 int vector_len = 0; 4653 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 4658 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4659 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4660 match(Set dst (ReplicateS src)); 4661 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4662 ins_encode %{ 4663 int vector_len = 1; 4664 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4665 %} 4666 ins_pipe( pipe_slow ); 4667 %} 4668 4669 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4670 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4671 match(Set dst (ReplicateS (LoadS mem))); 4672 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4673 ins_encode %{ 4674 int vector_len = 1; 4675 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4676 %} 4677 ins_pipe( pipe_slow ); 4678 %} 4679 4680 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4681 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4682 match(Set dst (ReplicateS src)); 4683 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4684 ins_encode %{ 4685 int vector_len = 2; 4686 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4687 %} 4688 ins_pipe( pipe_slow ); 4689 %} 4690 4691 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4692 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4693 match(Set dst (ReplicateS (LoadS mem))); 4694 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4695 ins_encode %{ 4696 int vector_len = 2; 4697 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4698 %} 4699 ins_pipe( pipe_slow ); 4700 %} 4701 4702 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4703 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4704 match(Set dst (ReplicateS con)); 4705 format %{ "movq $dst,[$constantaddress]\n\t" 4706 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4707 ins_encode %{ 4708 int vector_len = 0; 4709 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4710 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4711 %} 4712 ins_pipe( pipe_slow ); 4713 %} 4714 4715 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4716 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4717 match(Set dst (ReplicateS con)); 4718 format %{ "movq $dst,[$constantaddress]\n\t" 4719 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4720 ins_encode %{ 4721 int vector_len = 1; 4722 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4723 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4724 %} 4725 ins_pipe( pipe_slow ); 4726 %} 4727 4728 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4729 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4730 match(Set dst (ReplicateS con)); 4731 format %{ "movq $dst,[$constantaddress]\n\t" 4732 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4733 ins_encode %{ 4734 int vector_len = 2; 4735 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4736 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4737 %} 4738 ins_pipe( pipe_slow ); 4739 %} 4740 4741 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4742 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4743 match(Set dst (ReplicateS zero)); 4744 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4745 ins_encode %{ 4746 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4747 int vector_len = 2; 4748 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4749 %} 4750 ins_pipe( fpu_reg_reg ); 4751 %} 4752 4753 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4754 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4755 match(Set dst (ReplicateI src)); 4756 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4757 ins_encode %{ 4758 int vector_len = 0; 4759 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4760 %} 4761 ins_pipe( pipe_slow ); 4762 %} 4763 4764 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4765 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4766 match(Set dst (ReplicateI (LoadI mem))); 4767 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4768 ins_encode %{ 4769 int vector_len = 0; 4770 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4776 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4777 match(Set dst (ReplicateI src)); 4778 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4779 ins_encode %{ 4780 int vector_len = 1; 4781 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4787 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4788 match(Set dst (ReplicateI (LoadI mem))); 4789 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4790 ins_encode %{ 4791 int vector_len = 1; 4792 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4793 %} 4794 ins_pipe( pipe_slow ); 4795 %} 4796 4797 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4798 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4799 match(Set dst (ReplicateI src)); 4800 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4801 ins_encode %{ 4802 int vector_len = 2; 4803 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4804 %} 4805 ins_pipe( pipe_slow ); 4806 %} 4807 4808 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4809 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4810 match(Set dst (ReplicateI (LoadI mem))); 4811 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4812 ins_encode %{ 4813 int vector_len = 2; 4814 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4815 %} 4816 ins_pipe( pipe_slow ); 4817 %} 4818 4819 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4820 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4821 match(Set dst (ReplicateI con)); 4822 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4823 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4824 ins_encode %{ 4825 int vector_len = 0; 4826 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4827 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4833 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4834 match(Set dst (ReplicateI con)); 4835 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4836 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4837 ins_encode %{ 4838 int vector_len = 1; 4839 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4840 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4841 %} 4842 ins_pipe( pipe_slow ); 4843 %} 4844 4845 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4846 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4847 match(Set dst (ReplicateI con)); 4848 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4849 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4850 ins_encode %{ 4851 int vector_len = 2; 4852 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4853 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4854 %} 4855 ins_pipe( pipe_slow ); 4856 %} 4857 4858 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4859 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4860 match(Set dst (ReplicateI zero)); 4861 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4862 ins_encode %{ 4863 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4864 int vector_len = 2; 4865 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4866 %} 4867 ins_pipe( fpu_reg_reg ); 4868 %} 4869 4870 // Replicate long (8 byte) scalar to be vector 4871 #ifdef _LP64 4872 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4873 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4874 match(Set dst (ReplicateL src)); 4875 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4876 ins_encode %{ 4877 int vector_len = 1; 4878 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4879 %} 4880 ins_pipe( pipe_slow ); 4881 %} 4882 4883 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4884 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4885 match(Set dst (ReplicateL src)); 4886 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4887 ins_encode %{ 4888 int vector_len = 2; 4889 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4890 %} 4891 ins_pipe( pipe_slow ); 4892 %} 4893 #else // _LP64 4894 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4895 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4896 match(Set dst (ReplicateL src)); 4897 effect(TEMP dst, USE src, TEMP tmp); 4898 format %{ "movdl $dst,$src.lo\n\t" 4899 "movdl $tmp,$src.hi\n\t" 4900 "punpckldq $dst,$tmp\n\t" 4901 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4902 ins_encode %{ 4903 int vector_len = 1; 4904 __ movdl($dst$$XMMRegister, $src$$Register); 4905 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4906 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4907 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4908 %} 4909 ins_pipe( pipe_slow ); 4910 %} 4911 4912 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4913 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4914 match(Set dst (ReplicateL src)); 4915 effect(TEMP dst, USE src, TEMP tmp); 4916 format %{ "movdl $dst,$src.lo\n\t" 4917 "movdl $tmp,$src.hi\n\t" 4918 "punpckldq $dst,$tmp\n\t" 4919 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4920 ins_encode %{ 4921 int vector_len = 2; 4922 __ movdl($dst$$XMMRegister, $src$$Register); 4923 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4924 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4925 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4926 %} 4927 ins_pipe( pipe_slow ); 4928 %} 4929 #endif // _LP64 4930 4931 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4932 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4933 match(Set dst (ReplicateL con)); 4934 format %{ "movq $dst,[$constantaddress]\n\t" 4935 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4936 ins_encode %{ 4937 int vector_len = 1; 4938 __ movq($dst$$XMMRegister, $constantaddress($con)); 4939 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4940 %} 4941 ins_pipe( pipe_slow ); 4942 %} 4943 4944 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4945 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4946 match(Set dst (ReplicateL con)); 4947 format %{ "movq $dst,[$constantaddress]\n\t" 4948 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4949 ins_encode %{ 4950 int vector_len = 2; 4951 __ movq($dst$$XMMRegister, $constantaddress($con)); 4952 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4958 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4959 match(Set dst (ReplicateL (LoadL mem))); 4960 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4961 ins_encode %{ 4962 int vector_len = 0; 4963 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4969 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4970 match(Set dst (ReplicateL (LoadL mem))); 4971 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4972 ins_encode %{ 4973 int vector_len = 1; 4974 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4975 %} 4976 ins_pipe( pipe_slow ); 4977 %} 4978 4979 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4980 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4981 match(Set dst (ReplicateL (LoadL mem))); 4982 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4983 ins_encode %{ 4984 int vector_len = 2; 4985 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4986 %} 4987 ins_pipe( pipe_slow ); 4988 %} 4989 4990 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4991 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4992 match(Set dst (ReplicateL zero)); 4993 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4994 ins_encode %{ 4995 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4996 int vector_len = 2; 4997 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4998 %} 4999 ins_pipe( fpu_reg_reg ); 5000 %} 5001 5002 instruct Repl8F_evex(vecY dst, regF src) %{ 5003 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 5004 match(Set dst (ReplicateF src)); 5005 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 5006 ins_encode %{ 5007 int vector_len = 1; 5008 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5009 %} 5010 ins_pipe( pipe_slow ); 5011 %} 5012 5013 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 5014 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 5015 match(Set dst (ReplicateF (LoadF mem))); 5016 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 5017 ins_encode %{ 5018 int vector_len = 1; 5019 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 5020 %} 5021 ins_pipe( pipe_slow ); 5022 %} 5023 5024 instruct Repl16F_evex(vecZ dst, regF src) %{ 5025 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 5026 match(Set dst (ReplicateF src)); 5027 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 5028 ins_encode %{ 5029 int vector_len = 2; 5030 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 5036 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 5037 match(Set dst (ReplicateF (LoadF mem))); 5038 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 5039 ins_encode %{ 5040 int vector_len = 2; 5041 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 5042 %} 5043 ins_pipe( pipe_slow ); 5044 %} 5045 5046 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 5047 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 5048 match(Set dst (ReplicateF zero)); 5049 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 5050 ins_encode %{ 5051 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5052 int vector_len = 2; 5053 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5054 %} 5055 ins_pipe( fpu_reg_reg ); 5056 %} 5057 5058 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 5059 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 5060 match(Set dst (ReplicateF zero)); 5061 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 5062 ins_encode %{ 5063 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5064 int vector_len = 2; 5065 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5066 %} 5067 ins_pipe( fpu_reg_reg ); 5068 %} 5069 5070 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 5071 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5072 match(Set dst (ReplicateF zero)); 5073 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 5074 ins_encode %{ 5075 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5076 int vector_len = 2; 5077 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5078 %} 5079 ins_pipe( fpu_reg_reg ); 5080 %} 5081 5082 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 5083 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 5084 match(Set dst (ReplicateF zero)); 5085 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 5086 ins_encode %{ 5087 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5088 int vector_len = 2; 5089 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5090 %} 5091 ins_pipe( fpu_reg_reg ); 5092 %} 5093 5094 instruct Repl4D_evex(vecY dst, regD src) %{ 5095 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 5096 match(Set dst (ReplicateD src)); 5097 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 5098 ins_encode %{ 5099 int vector_len = 1; 5100 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5101 %} 5102 ins_pipe( pipe_slow ); 5103 %} 5104 5105 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 5106 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 5107 match(Set dst (ReplicateD (LoadD mem))); 5108 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 5109 ins_encode %{ 5110 int vector_len = 1; 5111 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 5112 %} 5113 ins_pipe( pipe_slow ); 5114 %} 5115 5116 instruct Repl8D_evex(vecZ dst, regD src) %{ 5117 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5118 match(Set dst (ReplicateD src)); 5119 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 5120 ins_encode %{ 5121 int vector_len = 2; 5122 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5123 %} 5124 ins_pipe( pipe_slow ); 5125 %} 5126 5127 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 5128 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5129 match(Set dst (ReplicateD (LoadD mem))); 5130 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 5131 ins_encode %{ 5132 int vector_len = 2; 5133 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 5134 %} 5135 ins_pipe( pipe_slow ); 5136 %} 5137 5138 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 5139 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 5140 match(Set dst (ReplicateD zero)); 5141 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 5142 ins_encode %{ 5143 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5144 int vector_len = 2; 5145 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5146 %} 5147 ins_pipe( fpu_reg_reg ); 5148 %} 5149 5150 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 5151 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 5152 match(Set dst (ReplicateD zero)); 5153 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 5154 ins_encode %{ 5155 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5156 int vector_len = 2; 5157 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5158 %} 5159 ins_pipe( fpu_reg_reg ); 5160 %} 5161 5162 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 5163 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5164 match(Set dst (ReplicateD zero)); 5165 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 5166 ins_encode %{ 5167 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5168 int vector_len = 2; 5169 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5170 %} 5171 ins_pipe( fpu_reg_reg ); 5172 %} 5173 5174 // ====================VECTOR INSERT======================================= 5175 5176 instruct rvinsert8B(vecD dst, vecD src, rRegI val, immU3 idx) %{ 5177 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5178 match(Set dst (VectorInsert (Binary src val) idx)); 5179 effect(TEMP dst); 5180 format %{ "movdqu $dst,$src\n\t" 5181 "pinsrb $dst,$val\t! Insert 8B" %} 5182 ins_encode %{ 5183 if ($dst$$XMMRegister != $src$$XMMRegister) { 5184 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5185 } 5186 __ pinsrb($dst$$XMMRegister, $val$$Register, $idx$$constant); 5187 %} 5188 ins_pipe( pipe_slow ); 5189 %} 5190 5191 instruct rvinsert16B(vecX dst, vecX src, rRegI val, immU4 idx) %{ 5192 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5193 match(Set dst (VectorInsert (Binary src val) idx)); 5194 effect(TEMP dst); 5195 format %{ "movdqu $dst,$src\n\t" 5196 "pinsrb $dst,$val\t! Insert 16B" %} 5197 ins_encode %{ 5198 if ($dst$$XMMRegister != $src$$XMMRegister) { 5199 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5200 } 5201 __ pinsrb($dst$$XMMRegister, $val$$Register, $idx$$constant); 5202 %} 5203 ins_pipe( pipe_slow ); 5204 %} 5205 5206 instruct rvinsert16B_avx(vecX dst, vecX src, rRegI val, immU4 idx) %{ 5207 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5208 match(Set dst (VectorInsert (Binary src val) idx)); 5209 effect(TEMP dst); 5210 format %{ "vmovdqu $dst,$src\n\t" 5211 "vpinsrb $dst,$dst,$val\t! Insert 16B" %} 5212 ins_encode %{ 5213 if ($dst$$XMMRegister != $src$$XMMRegister) { 5214 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5215 } 5216 __ vpinsrb($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 instruct rvinsert32B(vecY dst, vecY src, vecY tmp, rRegI val, immU5 idx) %{ 5222 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5223 match(Set dst (VectorInsert (Binary src val) idx)); 5224 effect(TEMP dst, TEMP tmp); 5225 format %{"vmovdqu $dst,$src\n\t" 5226 "vextracti128 $tmp,$src\n\t" 5227 "vpinsrb $tmp,$tmp,$val\n\t" 5228 "vinserti128 $dst,$tmp\t! Insert 32B" %} 5229 ins_encode %{ 5230 uint x_idx = $idx$$constant & right_n_bits(4); 5231 uint y_idx = ($idx$$constant >> 4) & 1; 5232 5233 if ($dst$$XMMRegister != $src$$XMMRegister) { 5234 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5235 } 5236 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5237 __ vpinsrb($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5238 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 instruct rvinsert64B(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU6 idx) %{ 5244 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5245 match(Set dst (VectorInsert (Binary src val) idx)); 5246 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5247 format %{ "evmovdquq $dst,$src\n\t" 5248 "vextracti64x4 $tmp,$src\n\t" 5249 "vextracti128 $tmp1,$tmp\n\t" 5250 "vpinsrb $tmp1,$tmp1,$val\n\t" 5251 "vinserti128 $tmp,$tmp,$tmp1\n\t" 5252 "vinserti64x4 $dst,$dst,$tmp\t! Insert 64B" %} 5253 ins_encode %{ 5254 uint x_idx = $idx$$constant & right_n_bits(4); 5255 uint y_idx = ($idx$$constant >> 4) & 1; 5256 uint z_idx = ($idx$$constant >> 5) & 1; 5257 5258 if ($dst$$XMMRegister != $src$$XMMRegister) { 5259 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5260 } 5261 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5262 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5263 __ vpinsrb($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5264 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5265 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 instruct rvinsert4S(vecD dst, vecD src, rRegI val, immU2 idx) %{ 5271 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5272 match(Set dst (VectorInsert (Binary src val) idx)); 5273 effect(TEMP dst); 5274 format %{ "movdqu $dst,$src\n\t" 5275 "pinsrw $dst,$val\t! Insert 4S" %} 5276 ins_encode %{ 5277 if ($dst$$XMMRegister != $src$$XMMRegister) { 5278 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5279 } 5280 __ pinsrw($dst$$XMMRegister, $val$$Register, $idx$$constant); 5281 %} 5282 ins_pipe( pipe_slow ); 5283 %} 5284 5285 instruct rvinsert8S(vecX dst, vecX src, rRegI val, immU3 idx) %{ 5286 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5287 match(Set dst (VectorInsert (Binary src val) idx)); 5288 effect(TEMP dst); 5289 format %{ "movdqu $dst,$src\n\t" 5290 "pinsrw $dst,$val\t! Insert 8S" %} 5291 ins_encode %{ 5292 if ($dst$$XMMRegister != $src$$XMMRegister) { 5293 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5294 } 5295 __ pinsrw($dst$$XMMRegister, $val$$Register, $idx$$constant); 5296 %} 5297 ins_pipe( pipe_slow ); 5298 %} 5299 5300 instruct rvinsert8S_avx(vecX dst, vecX src, rRegI val, immU3 idx) %{ 5301 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5302 match(Set dst (VectorInsert (Binary src val) idx)); 5303 effect(TEMP dst); 5304 format %{ "vmovdqu $dst,$src\n\t" 5305 "vpinsrw $dst,$dst,$val\t! Insert 8S" %} 5306 ins_encode %{ 5307 if ($dst$$XMMRegister != $src$$XMMRegister) { 5308 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5309 } 5310 __ vpinsrw($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5311 %} 5312 ins_pipe( pipe_slow ); 5313 %} 5314 5315 5316 instruct rvinsert16S(vecY dst, vecY src, vecX tmp, rRegI val, immU4 idx) %{ 5317 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5318 match(Set dst (VectorInsert (Binary src val) idx)); 5319 effect(TEMP dst, TEMP tmp); 5320 format %{ "vmovdqu $dst,$src\n\t" 5321 "vextracti128 $tmp,$src\n\t" 5322 "vpinsrw $tmp,$tmp,$val\n\t" 5323 "vinserti128 $dst,$dst,$tmp\t! Insert 16S" %} 5324 ins_encode %{ 5325 uint x_idx = $idx$$constant & right_n_bits(3); 5326 uint y_idx = ($idx$$constant >> 3) & 1; 5327 5328 if ($dst$$XMMRegister != $src$$XMMRegister) { 5329 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5330 } 5331 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5332 __ vpinsrw($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5333 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5334 %} 5335 ins_pipe( pipe_slow ); 5336 %} 5337 5338 instruct rvinsert32S(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU5 idx) %{ 5339 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5340 match(Set dst (VectorInsert (Binary src val) idx)); 5341 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5342 format %{ "evmovdquq $dst,$src\n\t" 5343 "vextracti64x4 $tmp,$src\n\t" 5344 "vextracti128 $tmp1,$tmp\n\t" 5345 "vpinsrw $tmp1,$tmp1,$val\n\t" 5346 "vinserti128 $tmp,$tmp,$tmp1\n\t" 5347 "vinserti64x4 $dst,$dst,$tmp\t! Insert 32S" %} 5348 ins_encode %{ 5349 uint x_idx = $idx$$constant & right_n_bits(3); 5350 uint y_idx = ($idx$$constant >> 3) & 1; 5351 uint z_idx = ($idx$$constant >> 4) & 1; 5352 5353 if ($dst$$XMMRegister != $src$$XMMRegister) { 5354 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5355 } 5356 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5357 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5358 __ vpinsrw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5359 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5360 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5361 %} 5362 ins_pipe( pipe_slow ); 5363 %} 5364 5365 instruct rvinsert2I(vecD dst, vecD src, rRegI val, immU1 idx) %{ 5366 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5367 match(Set dst (VectorInsert (Binary src val) idx)); 5368 effect(TEMP dst); 5369 format %{ "movdqu $dst,$src\n\t" 5370 "pinsrd $dst,$val\t! Insert 2I" %} 5371 ins_encode %{ 5372 if ($dst$$XMMRegister != $src$$XMMRegister) { 5373 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5374 } 5375 __ pinsrd($dst$$XMMRegister, $val$$Register, $idx$$constant); 5376 %} 5377 ins_pipe( pipe_slow ); 5378 %} 5379 5380 instruct rvinsert4I(vecX dst, vecX src, rRegI val, immU2 idx) %{ 5381 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5382 match(Set dst (VectorInsert (Binary src val) idx)); 5383 effect(TEMP dst); 5384 format %{ "movdqu $dst,$src\n\t" 5385 "pinsrd $dst,$val\t! Insert 4I" %} 5386 ins_encode %{ 5387 if ($dst$$XMMRegister != $src$$XMMRegister) { 5388 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5389 } 5390 __ pinsrd($dst$$XMMRegister, $val$$Register, $idx$$constant); 5391 %} 5392 ins_pipe( pipe_slow ); 5393 %} 5394 5395 instruct rvinsert4I_avx(vecX dst, vecX src, rRegI val, immU2 idx) %{ 5396 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5397 match(Set dst (VectorInsert (Binary src val) idx)); 5398 effect(TEMP dst); 5399 format %{ "vmovdqu $dst,$src\n\t" 5400 "vpinsrd $dst,$val\t! Insert 4I" %} 5401 ins_encode %{ 5402 if ($dst$$XMMRegister != $src$$XMMRegister) { 5403 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5404 } 5405 __ vpinsrd($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5406 %} 5407 ins_pipe( pipe_slow ); 5408 %} 5409 5410 instruct rvinsert8I(vecY dst, vecY src, vecY tmp, rRegI val, immU3 idx) %{ 5411 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5412 match(Set dst (VectorInsert (Binary src val) idx)); 5413 effect(TEMP dst, TEMP tmp); 5414 format %{ "vmovdqu $dst,$src\n\t" 5415 "vextracti128 $tmp,$src\n\t" 5416 "vpinsrd $tmp,$tmp,$val\n\t" 5417 "vinserti128 $dst,$dst,$tmp\t! Insert 8I" %} 5418 ins_encode %{ 5419 uint x_idx = $idx$$constant & right_n_bits(2); 5420 uint y_idx = ($idx$$constant >> 2) & 1; 5421 5422 if ($dst$$XMMRegister != $src$$XMMRegister) { 5423 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5424 } 5425 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5426 __ vpinsrd($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5427 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5428 %} 5429 ins_pipe( pipe_slow ); 5430 %} 5431 5432 instruct rvinsert16I(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU4 idx) %{ 5433 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5434 match(Set dst (VectorInsert (Binary src val) idx)); 5435 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5436 format %{ "evmovdquq $dst,$src\n\t" 5437 "vextracti64x4 $tmp,$src\n\t" 5438 "vextracti128 $tmp,$tmp\n\t" 5439 "vpinsrd $tmp,$tmp,$val\n\t" 5440 "vinserti128 $tmp,$tmp,$tmp\n\t" 5441 "vinserti64x4 $dst,$dst,$tmp\t! Insert 16I" %} 5442 ins_encode %{ 5443 uint x_idx = $idx$$constant & right_n_bits(2); 5444 uint y_idx = ($idx$$constant >> 2) & 1; 5445 uint z_idx = ($idx$$constant >> 3) & 1; 5446 5447 if ($dst$$XMMRegister != $src$$XMMRegister) { 5448 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5449 } 5450 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5451 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5452 __ vpinsrd($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5453 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5454 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5455 %} 5456 ins_pipe( pipe_slow ); 5457 %} 5458 5459 instruct rvinsert1L(vecD dst, vecD src, rRegL val, immI0 idx) %{ 5460 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5461 match(Set dst (VectorInsert (Binary src val) idx)); 5462 effect(TEMP dst); 5463 format %{ "movdqu $dst,$src\n\t" 5464 "pinsrq $dst,$val\t! Insert 1L" %} 5465 ins_encode %{ 5466 if ($dst$$XMMRegister != $src$$XMMRegister) { 5467 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5468 } 5469 __ pinsrq($dst$$XMMRegister, $val$$Register, 0); 5470 %} 5471 ins_pipe( pipe_slow ); 5472 %} 5473 5474 instruct rvinsert2L(vecX dst, vecX src, rRegL val, immU1 idx) %{ 5475 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5476 match(Set dst (VectorInsert (Binary src val) idx)); 5477 effect(TEMP dst); 5478 format %{ "movdqu $dst,$src\n\t" 5479 "pinsrq $dst,$dst\t! Insert 2L" %} 5480 ins_encode %{ 5481 if ($dst$$XMMRegister != $src$$XMMRegister) { 5482 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5483 } 5484 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 5485 %} 5486 ins_pipe( pipe_slow ); 5487 %} 5488 5489 instruct rvinsert2L_avx(vecX dst, vecX src, rRegL val, immU1 idx) %{ 5490 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5491 match(Set dst (VectorInsert (Binary src val) idx)); 5492 effect(TEMP dst); 5493 format %{ "vmovdqu $dst,$src\n\t" 5494 "vpinsrq $dst,$dst,$val\t! Insert 2L" %} 5495 ins_encode %{ 5496 if ($dst$$XMMRegister != $src$$XMMRegister) { 5497 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5498 } 5499 __ vpinsrq($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 instruct rvinsert4L(vecY dst, vecY src, vecY tmp, rRegL val, immU2 idx) %{ 5505 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5506 match(Set dst (VectorInsert (Binary src val) idx)); 5507 effect(TEMP dst, TEMP tmp); 5508 format %{ "vmovdqu $dst,$src\n\t" 5509 "vextracti128 $tmp,$src\n\t" 5510 "vpinsrq $tmp,$tmp,$val\n\t" 5511 "vinserti128 $dst,$dst,$tmp\t! Insert 4L" %} 5512 ins_encode %{ 5513 uint x_idx = $idx$$constant & 1; 5514 uint y_idx = ($idx$$constant >> 1) & 1; 5515 5516 if ($dst$$XMMRegister != $src$$XMMRegister) { 5517 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5518 } 5519 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5520 __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5521 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5522 %} 5523 ins_pipe( pipe_slow ); 5524 %} 5525 5526 instruct rvinsert8L(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegL val, immU3 idx) %{ 5527 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5528 match(Set dst (VectorInsert (Binary src val) idx)); 5529 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5530 format %{ "evmovdquq $dst,$src\n\t" 5531 "vextracti64x4 $tmp,$src\n\t" 5532 "vextracti128 $tmp,$tmp\n\t" 5533 "vpinsrq $tmp,$tmp,$val\n\t" 5534 "vinserti128 $tmp,$tmp,$tmp\n\t" 5535 "vinserti64x4 $dst,$dst,$tmp\t! Insert 8L" %} 5536 ins_encode %{ 5537 uint x_idx = $idx$$constant & 1; 5538 uint y_idx = ($idx$$constant >> 1) & 1; 5539 uint z_idx = ($idx$$constant >> 2) & 1; 5540 5541 if ($dst$$XMMRegister != $src$$XMMRegister) { 5542 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5543 } 5544 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5545 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5546 __ vpinsrq($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5547 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5548 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5549 %} 5550 ins_pipe( pipe_slow ); 5551 %} 5552 5553 instruct rvinsert2F(vecD dst, vecD src, regF val, immU1 idx) %{ 5554 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5555 match(Set dst (VectorInsert (Binary src val) idx)); 5556 effect(TEMP dst); 5557 format %{ "movdqu $dst,$src\n\t" 5558 "insertps $dst,$dst,$val\t! Insert 2F" %} 5559 ins_encode %{ 5560 if ($dst$$XMMRegister != $src$$XMMRegister) { 5561 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5562 } 5563 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5564 %} 5565 ins_pipe( pipe_slow ); 5566 %} 5567 5568 instruct rvinsert2F_avx(vecD dst, vecD src, regF val, immU1 idx) %{ 5569 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5570 match(Set dst (VectorInsert (Binary src val) idx)); 5571 effect(TEMP dst); 5572 format %{ "movdqu $dst,$src\n\t" 5573 "insertps $dst,$dst,$val\t! Insert 2F" %} 5574 ins_encode %{ 5575 if ($dst$$XMMRegister != $src$$XMMRegister) { 5576 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5577 } 5578 __ vinsertps($dst$$XMMRegister, $dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5579 %} 5580 ins_pipe( pipe_slow ); 5581 %} 5582 5583 instruct rvinsert4F(vecX dst, vecX src, regF val, immU2 idx) %{ 5584 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5585 match(Set dst (VectorInsert (Binary src val) idx)); 5586 effect(TEMP dst); 5587 format %{ "movdqu $dst,$src\n\t" 5588 "insertps $dst,$dst,$val\t! Insert 4F" %} 5589 ins_encode %{ 5590 if ($dst$$XMMRegister != $src$$XMMRegister) { 5591 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5592 } 5593 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5594 %} 5595 ins_pipe( pipe_slow ); 5596 %} 5597 5598 instruct rvinsert4F_avx(vecX dst, vecX src, regF val, immU2 idx) %{ 5599 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5600 match(Set dst (VectorInsert (Binary src val) idx)); 5601 effect(TEMP dst); 5602 format %{ "vmovdqu $dst,$src\n\t" 5603 "vinsertps $dst,$dst,$val\t! Insert 4F" %} 5604 ins_encode %{ 5605 if ($dst$$XMMRegister != $src$$XMMRegister) { 5606 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5607 } 5608 __ vinsertps($dst$$XMMRegister, $dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5609 %} 5610 ins_pipe( pipe_slow ); 5611 %} 5612 5613 instruct rvinsert8F(vecY dst, vecY src, vecY tmp, regF val, immU3 idx) %{ 5614 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5615 match(Set dst (VectorInsert (Binary src val) idx)); 5616 effect(TEMP dst, TEMP tmp); 5617 format %{ "vmovdqu $dst,$src\n\t" 5618 "vextractf128 $tmp,$src\n\t" 5619 "vinsertps $tmp,$tmp,$val\n\t" 5620 "vinsertf128 $dst,$dst,$tmp\t! Insert 8F" %} 5621 ins_encode %{ 5622 uint x_idx = $idx$$constant & right_n_bits(2); 5623 uint y_idx = ($idx$$constant >> 2) & 1; 5624 5625 if ($dst$$XMMRegister != $src$$XMMRegister) { 5626 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5627 } 5628 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5629 __ vinsertps($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$XMMRegister, x_idx); 5630 __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5631 %} 5632 ins_pipe( pipe_slow ); 5633 %} 5634 5635 instruct rvinsert16F(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, regF val, immU4 idx) %{ 5636 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5637 match(Set dst (VectorInsert (Binary src val) idx)); 5638 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5639 format %{ "evmovdquq $dst,$src\n\t" 5640 "vextractf128 $tmp,$src\n\t" 5641 "vinsertps $tmp,$tmp,$val\n\t" 5642 "movsbl $dst,$dst\t! Insert 4I" %} 5643 ins_encode %{ 5644 uint x_idx = $idx$$constant & right_n_bits(2); 5645 uint y_idx = ($idx$$constant >> 2) & 1; 5646 uint z_idx = ($idx$$constant >> 3) & 1; 5647 5648 if ($dst$$XMMRegister != $src$$XMMRegister) { 5649 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5650 } 5651 __ vextractf64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5652 __ vextractf128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5653 __ vinsertps($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$XMMRegister, x_idx); 5654 __ vinsertf128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5655 __ vinsertf64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 instruct rvinsert1D(vecD dst, vecD src, regD val, rRegL tmp, immI0 idx) %{ 5661 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5662 match(Set dst (VectorInsert (Binary src val) idx)); 5663 effect(TEMP dst, TEMP tmp); 5664 format %{ "movdqu $dst,$src\n\t" 5665 "movq $tmp,$val\n\t" 5666 "pinsrq $dst,$tmp\t! Insert 1D" %} 5667 ins_encode %{ 5668 if ($dst$$XMMRegister != $src$$XMMRegister) { 5669 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5670 } 5671 __ movq($tmp$$Register, $val$$XMMRegister); 5672 __ pinsrq($dst$$XMMRegister, $tmp$$Register, 0); 5673 %} 5674 ins_pipe( pipe_slow ); 5675 %} 5676 5677 instruct rvinsert2D(vecX dst, vecX src, regD val, rRegL tmp, immU1 idx) %{ 5678 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5679 match(Set dst (VectorInsert (Binary src val) idx)); 5680 effect(TEMP dst, TEMP tmp); 5681 format %{ "movdqu $dst,$src\n\t" 5682 "movq $dst,$src\n\t" 5683 "pinsrq $dst,$dst\t! Insert 2D" %} 5684 ins_encode %{ 5685 if ($dst$$XMMRegister != $src$$XMMRegister) { 5686 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5687 } 5688 __ movq($tmp$$Register, $val$$XMMRegister); 5689 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct rvinsert2D_avx(vecX dst, vecX src, regD val, rRegL tmp, immU1 idx) %{ 5695 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5696 match(Set dst (VectorInsert (Binary src val) idx)); 5697 effect(TEMP dst, TEMP tmp); 5698 format %{ "vmovdqu $dst,$src\n\t" 5699 "movq $tmp,$val\n\t" 5700 "vpinsrq $dst,$dst,$tmp\t! Insert 2D" %} 5701 ins_encode %{ 5702 if ($dst$$XMMRegister != $src$$XMMRegister) { 5703 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5704 } 5705 __ movq($tmp$$Register, $val$$XMMRegister); 5706 __ vpinsrq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5707 %} 5708 ins_pipe( pipe_slow ); 5709 %} 5710 5711 instruct rvinsert4D(vecY dst, vecY src, vecY tmp, regD val, rRegL tmp1, immU2 idx) %{ 5712 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5713 match(Set dst (VectorInsert (Binary src val) idx)); 5714 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5715 format %{ "vmovdqu $dst,$src\n\t" 5716 "vextracti128 $tmp,$src\n\t" 5717 "movq $tmp1,$val\n\t" 5718 "vpinsrq $tmp,$tmp,$tmp1\n\t" 5719 "vinserti128 $dst,$dst,$tmp\t! Insert 4D" %} 5720 ins_encode %{ 5721 uint x_idx = $idx$$constant & 1; 5722 uint y_idx = ($idx$$constant >> 1) & 1; 5723 5724 if ($dst$$XMMRegister != $src$$XMMRegister) { 5725 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5726 } 5727 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5728 __ movq($tmp1$$Register, $val$$XMMRegister); 5729 __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$Register, x_idx); 5730 __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5731 %} 5732 ins_pipe( pipe_slow ); 5733 %} 5734 5735 instruct rvinsert8D(vecZ dst, vecZ src, vecZ tmp, vecY tmp2, regD val, rRegL tmp1, immU3 idx) %{ 5736 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5737 match(Set dst (VectorInsert (Binary src val) idx)); 5738 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2); 5739 format %{ "evmovdquq $dst,$src\n\t" 5740 "vextractf64x4 $tmp,$src\n\t" 5741 "vextractf128 $tmp,$tmp\n\t" 5742 "movq $tmp1,$val\n\t" 5743 "vpinsrq $tmp,$tmp,$val\n\t" 5744 "vinsertf128 $tmp,$tmp,$tmp\n\t" 5745 "vinsertf64x4 $dst,$dst,$tmp\t! Insert 8D" %} 5746 ins_encode %{ 5747 uint x_idx = $idx$$constant & 1; 5748 uint y_idx = ($idx$$constant >> 1) & 1; 5749 uint z_idx = ($idx$$constant >> 2) & 1; 5750 5751 if ($dst$$XMMRegister != $src$$XMMRegister) { 5752 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5753 } 5754 __ vextractf64x4($tmp2$$XMMRegister, $src$$XMMRegister, z_idx); 5755 __ vextractf128($tmp$$XMMRegister, $tmp2$$XMMRegister, y_idx); 5756 __ movq($tmp1$$Register, $val$$XMMRegister); 5757 __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$Register, x_idx); 5758 __ vinsertf128($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, y_idx); 5759 __ vinsertf64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, z_idx); 5760 %} 5761 ins_pipe( pipe_slow ); 5762 %} 5763 5764 // ====================REDUCTION ARITHMETIC======================================= 5765 5766 instruct rsadd8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5767 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5768 match(Set dst (AddReductionVI src1 src2)); 5769 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5770 format %{ 5771 "pshufd $tmp,$src2,0x1\n\t" 5772 "paddb $tmp,$src2\n\t" 5773 "movzbl $dst,$src1\n\t" 5774 "pextrb $tmp2,$tmp, 0x0\n\t" 5775 "addl $dst,$tmp2\n\t" 5776 "pextrb $tmp2,$tmp, 0x1\n\t" 5777 "addl $dst,$tmp2\n\t" 5778 "pextrb $tmp2,$tmp, 0x2\n\t" 5779 "addl $dst,$tmp2\n\t" 5780 "pextrb $tmp2,$tmp, 0x3\n\t" 5781 "addl $dst,$tmp2\n\t" 5782 "movsbl $dst,$dst\t! add reduction8B" %} 5783 ins_encode %{ 5784 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5785 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 5786 __ movzbl($dst$$Register, $src1$$Register); 5787 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5788 __ addl($dst$$Register, $tmp2$$Register); 5789 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 5790 __ addl($dst$$Register, $tmp2$$Register); 5791 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 5792 __ addl($dst$$Register, $tmp2$$Register); 5793 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 5794 __ addl($dst$$Register, $tmp2$$Register); 5795 __ movsbl($dst$$Register, $dst$$Register); 5796 %} 5797 ins_pipe( pipe_slow ); 5798 %} 5799 5800 instruct rsadd16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5801 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5802 match(Set dst (AddReductionVI src1 src2)); 5803 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5804 format %{ "pshufd $tmp,$src2,0xE\n\t" 5805 "paddb $tmp,$src2\n\t" 5806 "pshufd $tmp2,$tmp,0x1\n\t" 5807 "paddb $tmp,$tmp,$tmp2\n\t" 5808 "movzbl $dst,$src1\n\t" 5809 "pextrb $tmp3,$tmp, 0x0\n\t" 5810 "addl $dst,$tmp3\n\t" 5811 "pextrb $tmp3,$tmp, 0x1\n\t" 5812 "addl $dst,$tmp3\n\t" 5813 "pextrb $tmp3,$tmp, 0x2\n\t" 5814 "addl $dst,$tmp3\n\t" 5815 "pextrb $tmp3,$tmp, 0x3\n\t" 5816 "addl $dst,$tmp3\n\t" 5817 "movsbl $dst,$dst\t! add reduction16B" %} 5818 ins_encode %{ 5819 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5820 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 5821 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5822 __ paddb($tmp$$XMMRegister, $tmp2$$XMMRegister); 5823 __ movzbl($dst$$Register, $src1$$Register); 5824 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 5825 __ addl($dst$$Register, $tmp3$$Register); 5826 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 5827 __ addl($dst$$Register, $tmp3$$Register); 5828 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5829 __ addl($dst$$Register, $tmp3$$Register); 5830 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5831 __ addl($dst$$Register, $tmp3$$Register); 5832 __ movsbl($dst$$Register, $dst$$Register); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 instruct rvadd32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5838 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5839 match(Set dst (AddReductionVI src1 src2)); 5840 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5841 format %{ "vextracti128_high $tmp,$src2\n\t" 5842 "vpaddb $tmp,$tmp,$src2\n\t" 5843 "pshufd $tmp2,$tmp,0xE\n\t" 5844 "vpaddb $tmp,$tmp,$tmp2\n\t" 5845 "pshufd $tmp2,$tmp,0x1\n\t" 5846 "vpaddb $tmp,$tmp,$tmp2\n\t" 5847 "movzbl $dst,$src1\n\t" 5848 "pextrb $tmp3,$tmp, 0x0\n\t" 5849 "addl $dst,$tmp3\n\t" 5850 "pextrb $tmp3,$tmp, 0x1\n\t" 5851 "addl $dst,$tmp3\n\t" 5852 "pextrb $tmp3,$tmp, 0x2\n\t" 5853 "addl $dst,$tmp3\n\t" 5854 "pextrb $tmp3,$tmp, 0x3\n\t" 5855 "addl $dst,$tmp3\n\t" 5856 "movsbl $dst,$dst\t! add reduction32B" %} 5857 ins_encode %{ 5858 int vector_len = 0; 5859 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5860 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5861 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5862 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5863 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5864 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5865 __ movzbl($dst$$Register, $src1$$Register); 5866 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 5867 __ addl($dst$$Register, $tmp3$$Register); 5868 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 5869 __ addl($dst$$Register, $tmp3$$Register); 5870 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5871 __ addl($dst$$Register, $tmp3$$Register); 5872 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5873 __ addl($dst$$Register, $tmp3$$Register); 5874 __ movsbl($dst$$Register, $dst$$Register); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 instruct rvadd64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5880 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5881 match(Set dst (AddReductionVI src1 src2)); 5882 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5883 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5884 "vpaddb $tmp2,$tmp2,$src2\n\t" 5885 "vextracti128_high $tmp,$tmp2\n\t" 5886 "vpaddb $tmp,$tmp,$tmp2\n\t" 5887 "pshufd $tmp2,$tmp,0xE\n\t" 5888 "vpaddb $tmp,$tmp,$tmp2\n\t" 5889 "pshufd $tmp2,$tmp,0x1\n\t" 5890 "vpaddb $tmp,$tmp,$tmp2\n\t" 5891 "movzbl $dst,$src1\n\t" 5892 "movdl $tmp3,$tmp\n\t" 5893 "addl $dst,$tmp3\n\t" 5894 "shrl $tmp3,0x8\n\t" 5895 "addl $dst,$tmp3\n\t" 5896 "shrl $tmp3,0x8\n\t" 5897 "addl $dst,$tmp3\n\t" 5898 "shrl $tmp3,0x8\n\t" 5899 "addl $dst,$tmp3\n\t" 5900 "movsbl $dst,$dst\t! add reduction64B" %} 5901 ins_encode %{ 5902 int vector_len = 0; 5903 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5904 __ vpaddb($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5905 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5906 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5907 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5908 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5909 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5910 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5911 __ movzbl($dst$$Register, $src1$$Register); 5912 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 5913 __ addl($dst$$Register, $tmp3$$Register); 5914 __ shrl($tmp3$$Register, 8); 5915 __ addl($dst$$Register, $tmp3$$Register); 5916 __ shrl($tmp3$$Register, 8); 5917 __ addl($dst$$Register, $tmp3$$Register); 5918 __ shrl($tmp3$$Register, 8); 5919 __ addl($dst$$Register, $tmp3$$Register); 5920 __ movsbl($dst$$Register, $dst$$Register); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct rsadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5926 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5927 match(Set dst (AddReductionVI src1 src2)); 5928 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5929 format %{ 5930 "movdqu $tmp,$src2\n\t" 5931 "phaddw $tmp,$tmp\n\t" 5932 "phaddw $tmp,$tmp\n\t" 5933 "movzwl $dst,$src1\n\t" 5934 "pextrw $tmp2,$tmp, 0x0\n\t" 5935 "addw $dst,$tmp2\n\t" 5936 "movswl $dst,$dst\t! add reduction4S" %} 5937 ins_encode %{ 5938 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5939 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5940 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5941 __ movzwl($dst$$Register, $src1$$Register); 5942 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5943 __ addw($dst$$Register, $tmp2$$Register); 5944 __ movswl($dst$$Register, $dst$$Register); 5945 %} 5946 ins_pipe( pipe_slow ); 5947 %} 5948 5949 instruct rvadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5950 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5951 match(Set dst (AddReductionVI src1 src2)); 5952 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5953 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5954 "vphaddw $tmp,$tmp,$tmp\n\t" 5955 "movzwl $dst,$src1\n\t" 5956 "pextrw $tmp2,$tmp, 0x0\n\t" 5957 "addw $dst,$tmp2\n\t" 5958 "movswl $dst,$dst\t! add reduction4S" %} 5959 ins_encode %{ 5960 int vector_len = 0; 5961 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5962 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5963 __ movzwl($dst$$Register, $src1$$Register); 5964 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5965 __ addw($dst$$Register, $tmp2$$Register); 5966 __ movswl($dst$$Register, $dst$$Register); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 instruct rsadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5972 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5973 match(Set dst (AddReductionVI src1 src2)); 5974 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5975 format %{ 5976 "movdqu $tmp,$src2\n\t" 5977 "phaddw $tmp,$tmp\n\t" 5978 "phaddw $tmp,$tmp\n\t" 5979 "phaddw $tmp,$tmp\n\t" 5980 "movzwl $dst,$src1\n\t" 5981 "pextrw $tmp2,$tmp, 0x0\n\t" 5982 "addw $dst,$tmp2\n\t" 5983 "movswl $dst,$dst\t! add reduction8S" %} 5984 ins_encode %{ 5985 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5986 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5987 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5988 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5989 __ movzwl($dst$$Register, $src1$$Register); 5990 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5991 __ addw($dst$$Register, $tmp2$$Register); 5992 __ movswl($dst$$Register, $dst$$Register); 5993 %} 5994 ins_pipe( pipe_slow ); 5995 %} 5996 5997 instruct rvadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5998 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5999 match(Set dst (AddReductionVI src1 src2)); 6000 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6001 format %{ "vphaddw $tmp,$src2,$src2\n\t" 6002 "vphaddw $tmp,$tmp,$tmp\n\t" 6003 "vphaddw $tmp,$tmp,$tmp\n\t" 6004 "movzwl $dst,$src1\n\t" 6005 "pextrw $tmp2,$tmp, 0x0\n\t" 6006 "addw $dst,$tmp2\n\t" 6007 "movswl $dst,$dst\t! add reduction8S" %} 6008 ins_encode %{ 6009 int vector_len = 0; 6010 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6011 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6012 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6013 __ movzwl($dst$$Register, $src1$$Register); 6014 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6015 __ addw($dst$$Register, $tmp2$$Register); 6016 __ movswl($dst$$Register, $dst$$Register); 6017 %} 6018 ins_pipe( pipe_slow ); 6019 %} 6020 6021 instruct rvadd16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2) %{ 6022 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6023 match(Set dst (AddReductionVI src1 src2)); 6024 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6025 format %{ "vphaddw $tmp,$src2,$src2\n\t" 6026 "vphaddw $tmp,$tmp,$tmp\n\t" 6027 "vphaddw $tmp,$tmp,$tmp\n\t" 6028 "vphaddw $tmp,$tmp,$tmp\n\t" 6029 "movzwl $dst,$src1\n\t" 6030 "pextrw $tmp2,$tmp, 0x0\n\t" 6031 "addw $dst,$tmp2\n\t" 6032 "movswl $dst,$dst\t! add reduction16S" %} 6033 ins_encode %{ 6034 int vector_len = 1; 6035 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6036 __ vpermq($tmp$$XMMRegister, $tmp$$XMMRegister, 0xD8, vector_len); 6037 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6038 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6039 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6040 __ movzwl($dst$$Register, $src1$$Register); 6041 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6042 __ addw($dst$$Register, $tmp2$$Register); 6043 __ movswl($dst$$Register, $dst$$Register); 6044 %} 6045 ins_pipe( pipe_slow ); 6046 %} 6047 6048 instruct rvadd32S_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6049 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6050 match(Set dst (AddReductionVI src1 src2)); 6051 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6052 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6053 "vpaddw $tmp2,$tmp2,$src2\n\t" 6054 "vextracti128_high $tmp,$tmp2\n\t" 6055 "vpaddw $tmp,$tmp,$tmp2\n\t" 6056 "pshufd $tmp2,$tmp,0xE\n\t" 6057 "vpaddw $tmp,$tmp,$tmp2\n\t" 6058 "pshufd $tmp2,$tmp,0x1\n\t" 6059 "vpaddw $tmp,$tmp,$tmp2\n\t" 6060 "movdl $tmp3,$tmp\n\t" 6061 "addw $dst,$tmp3\n\t" 6062 "shrl $tmp3,0x16\n\t" 6063 "addw $dst,$tmp3\n\t" 6064 "movswl $dst,$dst\t! add reduction32S" %} 6065 ins_encode %{ 6066 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6067 __ vpaddw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6068 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6069 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6070 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6071 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6072 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6073 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6074 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 6075 __ movzwl($dst$$Register, $src1$$Register); 6076 __ addw($dst$$Register, $tmp3$$Register); 6077 __ shrl($tmp3$$Register, 16); 6078 __ addw($dst$$Register, $tmp3$$Register); 6079 __ movswl($dst$$Register, $dst$$Register); 6080 %} 6081 ins_pipe( pipe_slow ); 6082 %} 6083 6084 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6085 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6086 match(Set dst (AddReductionVI src1 src2)); 6087 effect(TEMP tmp2, TEMP tmp); 6088 format %{ "movdqu $tmp2,$src2\n\t" 6089 "phaddd $tmp2,$tmp2\n\t" 6090 "movd $tmp,$src1\n\t" 6091 "paddd $tmp,$tmp2\n\t" 6092 "movd $dst,$tmp\t! add reduction2I" %} 6093 ins_encode %{ 6094 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 6095 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 6096 __ movdl($tmp$$XMMRegister, $src1$$Register); 6097 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 6098 __ movdl($dst$$Register, $tmp$$XMMRegister); 6099 %} 6100 ins_pipe( pipe_slow ); 6101 %} 6102 6103 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6104 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6105 match(Set dst (AddReductionVI src1 src2)); 6106 effect(TEMP tmp, TEMP tmp2); 6107 format %{ "vphaddd $tmp,$src2,$src2\n\t" 6108 "movd $tmp2,$src1\n\t" 6109 "vpaddd $tmp2,$tmp2,$tmp\n\t" 6110 "movd $dst,$tmp2\t! add reduction2I" %} 6111 ins_encode %{ 6112 int vector_len = 0; 6113 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6114 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6115 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6116 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6117 %} 6118 ins_pipe( pipe_slow ); 6119 %} 6120 6121 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6122 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6123 match(Set dst (AddReductionVI src1 src2)); 6124 effect(TEMP tmp, TEMP tmp2); 6125 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6126 "vpaddd $tmp,$src2,$tmp2\n\t" 6127 "movd $tmp2,$src1\n\t" 6128 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6129 "movd $dst,$tmp2\t! add reduction2I" %} 6130 ins_encode %{ 6131 int vector_len = 0; 6132 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6133 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6134 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6135 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6136 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6137 %} 6138 ins_pipe( pipe_slow ); 6139 %} 6140 6141 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6142 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6143 match(Set dst (AddReductionVI src1 src2)); 6144 effect(TEMP tmp, TEMP tmp2); 6145 format %{ "movdqu $tmp,$src2\n\t" 6146 "phaddd $tmp,$tmp\n\t" 6147 "phaddd $tmp,$tmp\n\t" 6148 "movd $tmp2,$src1\n\t" 6149 "paddd $tmp2,$tmp\n\t" 6150 "movd $dst,$tmp2\t! add reduction4I" %} 6151 ins_encode %{ 6152 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 6153 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 6154 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 6155 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6156 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 6157 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6158 %} 6159 ins_pipe( pipe_slow ); 6160 %} 6161 6162 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6163 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6164 match(Set dst (AddReductionVI src1 src2)); 6165 effect(TEMP tmp, TEMP tmp2); 6166 format %{ "vphaddd $tmp,$src2,$src2\n\t" 6167 "vphaddd $tmp,$tmp,$tmp\n\t" 6168 "movd $tmp2,$src1\n\t" 6169 "vpaddd $tmp2,$tmp2,$tmp\n\t" 6170 "movd $dst,$tmp2\t! add reduction4I" %} 6171 ins_encode %{ 6172 int vector_len = 0; 6173 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6174 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6175 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6176 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6177 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6178 %} 6179 ins_pipe( pipe_slow ); 6180 %} 6181 6182 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6183 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6184 match(Set dst (AddReductionVI src1 src2)); 6185 effect(TEMP tmp, TEMP tmp2); 6186 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6187 "vpaddd $tmp,$src2,$tmp2\n\t" 6188 "pshufd $tmp2,$tmp,0x1\n\t" 6189 "vpaddd $tmp,$tmp,$tmp2\n\t" 6190 "movd $tmp2,$src1\n\t" 6191 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6192 "movd $dst,$tmp2\t! add reduction4I" %} 6193 ins_encode %{ 6194 int vector_len = 0; 6195 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6196 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6197 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6198 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6199 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6200 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6201 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6202 %} 6203 ins_pipe( pipe_slow ); 6204 %} 6205 6206 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6207 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6208 match(Set dst (AddReductionVI src1 src2)); 6209 effect(TEMP tmp, TEMP tmp2); 6210 format %{ "vphaddd $tmp,$src2,$src2\n\t" 6211 "vphaddd $tmp,$tmp,$tmp2\n\t" 6212 "vextracti128_high $tmp2,$tmp\n\t" 6213 "vpaddd $tmp,$tmp,$tmp2\n\t" 6214 "movd $tmp2,$src1\n\t" 6215 "vpaddd $tmp2,$tmp2,$tmp\n\t" 6216 "movd $dst,$tmp2\t! add reduction8I" %} 6217 ins_encode %{ 6218 int vector_len = 1; 6219 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6220 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6221 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 6222 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6223 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6224 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6225 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6231 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6232 match(Set dst (AddReductionVI src1 src2)); 6233 effect(TEMP tmp, TEMP tmp2); 6234 format %{ "vextracti128_high $tmp,$src2\n\t" 6235 "vpaddd $tmp,$tmp,$src2\n\t" 6236 "pshufd $tmp2,$tmp,0xE\n\t" 6237 "vpaddd $tmp,$tmp,$tmp2\n\t" 6238 "pshufd $tmp2,$tmp,0x1\n\t" 6239 "vpaddd $tmp,$tmp,$tmp2\n\t" 6240 "movd $tmp2,$src1\n\t" 6241 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6242 "movd $dst,$tmp2\t! add reduction8I" %} 6243 ins_encode %{ 6244 int vector_len = 0; 6245 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6246 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6247 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6248 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6249 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6250 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6251 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6252 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6253 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6254 %} 6255 ins_pipe( pipe_slow ); 6256 %} 6257 6258 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6259 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6260 match(Set dst (AddReductionVI src1 src2)); 6261 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6262 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6263 "vpaddd $tmp3,$tmp3,$src2\n\t" 6264 "vextracti128_high $tmp,$tmp3\n\t" 6265 "vpaddd $tmp,$tmp,$tmp3\n\t" 6266 "pshufd $tmp2,$tmp,0xE\n\t" 6267 "vpaddd $tmp,$tmp,$tmp2\n\t" 6268 "pshufd $tmp2,$tmp,0x1\n\t" 6269 "vpaddd $tmp,$tmp,$tmp2\n\t" 6270 "movd $tmp2,$src1\n\t" 6271 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6272 "movd $dst,$tmp2\t! mul reduction16I" %} 6273 ins_encode %{ 6274 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6275 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6276 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6277 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 6278 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6279 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6280 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6281 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6282 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6283 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6284 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 #ifdef _LP64 6290 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6291 predicate(UseAVX > 2); 6292 match(Set dst (AddReductionVL src1 src2)); 6293 effect(TEMP tmp, TEMP tmp2); 6294 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6295 "vpaddq $tmp,$src2,$tmp2\n\t" 6296 "movdq $tmp2,$src1\n\t" 6297 "vpaddq $tmp2,$tmp,$tmp2\n\t" 6298 "movdq $dst,$tmp2\t! add reduction2L" %} 6299 ins_encode %{ 6300 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6301 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 6302 __ movdq($tmp2$$XMMRegister, $src1$$Register); 6303 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6304 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6305 %} 6306 ins_pipe( pipe_slow ); 6307 %} 6308 6309 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6310 predicate(UseAVX > 2); 6311 match(Set dst (AddReductionVL src1 src2)); 6312 effect(TEMP tmp, TEMP tmp2); 6313 format %{ "vextracti128_high $tmp,$src2\n\t" 6314 "vpaddq $tmp2,$tmp,$src2\n\t" 6315 "pshufd $tmp,$tmp2,0xE\n\t" 6316 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6317 "movdq $tmp,$src1\n\t" 6318 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6319 "movdq $dst,$tmp2\t! add reduction4L" %} 6320 ins_encode %{ 6321 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6322 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 6323 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6324 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6325 __ movdq($tmp$$XMMRegister, $src1$$Register); 6326 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6327 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6333 predicate(UseAVX > 2); 6334 match(Set dst (AddReductionVL src1 src2)); 6335 effect(TEMP tmp, TEMP tmp2); 6336 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6337 "vpaddq $tmp2,$tmp2,$src2\n\t" 6338 "vextracti128_high $tmp,$tmp2\n\t" 6339 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6340 "pshufd $tmp,$tmp2,0xE\n\t" 6341 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6342 "movdq $tmp,$src1\n\t" 6343 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6344 "movdq $dst,$tmp2\t! add reduction8L" %} 6345 ins_encode %{ 6346 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6347 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6348 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6349 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6350 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6351 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6352 __ movdq($tmp$$XMMRegister, $src1$$Register); 6353 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6354 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 #endif 6359 6360 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6361 predicate(UseSSE >= 1 && UseAVX == 0); 6362 match(Set dst (AddReductionVF dst src2)); 6363 effect(TEMP dst, TEMP tmp); 6364 format %{ "addss $dst,$src2\n\t" 6365 "pshufd $tmp,$src2,0x01\n\t" 6366 "addss $dst,$tmp\t! add reduction2F" %} 6367 ins_encode %{ 6368 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 6369 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6370 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6371 %} 6372 ins_pipe( pipe_slow ); 6373 %} 6374 6375 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6376 predicate(UseAVX > 0); 6377 match(Set dst (AddReductionVF dst src2)); 6378 effect(TEMP dst, TEMP tmp); 6379 format %{ "vaddss $dst,$dst,$src2\n\t" 6380 "pshufd $tmp,$src2,0x01\n\t" 6381 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 6382 ins_encode %{ 6383 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6384 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6385 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6386 %} 6387 ins_pipe( pipe_slow ); 6388 %} 6389 6390 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6391 predicate(UseSSE >= 1 && UseAVX == 0); 6392 match(Set dst (AddReductionVF dst src2)); 6393 effect(TEMP dst, TEMP tmp); 6394 format %{ "addss $dst,$src2\n\t" 6395 "pshufd $tmp,$src2,0x01\n\t" 6396 "addss $dst,$tmp\n\t" 6397 "pshufd $tmp,$src2,0x02\n\t" 6398 "addss $dst,$tmp\n\t" 6399 "pshufd $tmp,$src2,0x03\n\t" 6400 "addss $dst,$tmp\t! add reduction4F" %} 6401 ins_encode %{ 6402 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 6403 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6404 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6405 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6406 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6407 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6408 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6409 %} 6410 ins_pipe( pipe_slow ); 6411 %} 6412 6413 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6414 predicate(UseAVX > 0); 6415 match(Set dst (AddReductionVF dst src2)); 6416 effect(TEMP tmp, TEMP dst); 6417 format %{ "vaddss $dst,dst,$src2\n\t" 6418 "pshufd $tmp,$src2,0x01\n\t" 6419 "vaddss $dst,$dst,$tmp\n\t" 6420 "pshufd $tmp,$src2,0x02\n\t" 6421 "vaddss $dst,$dst,$tmp\n\t" 6422 "pshufd $tmp,$src2,0x03\n\t" 6423 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 6424 ins_encode %{ 6425 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6426 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6427 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6428 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6429 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6430 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6431 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 6437 predicate(UseAVX > 0); 6438 match(Set dst (AddReductionVF dst src2)); 6439 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6440 format %{ "vaddss $dst,$dst,$src2\n\t" 6441 "pshufd $tmp,$src2,0x01\n\t" 6442 "vaddss $dst,$dst,$tmp\n\t" 6443 "pshufd $tmp,$src2,0x02\n\t" 6444 "vaddss $dst,$dst,$tmp\n\t" 6445 "pshufd $tmp,$src2,0x03\n\t" 6446 "vaddss $dst,$dst,$tmp\n\t" 6447 "vextractf128_high $tmp2,$src2\n\t" 6448 "vaddss $dst,$dst,$tmp2\n\t" 6449 "pshufd $tmp,$tmp2,0x01\n\t" 6450 "vaddss $dst,$dst,$tmp\n\t" 6451 "pshufd $tmp,$tmp2,0x02\n\t" 6452 "vaddss $dst,$dst,$tmp\n\t" 6453 "pshufd $tmp,$tmp2,0x03\n\t" 6454 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 6455 ins_encode %{ 6456 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6457 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6458 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6459 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6460 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6461 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6462 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6463 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6464 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6465 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6466 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6467 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6468 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6469 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6470 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6471 %} 6472 ins_pipe( pipe_slow ); 6473 %} 6474 6475 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 6476 predicate(UseAVX > 2); 6477 match(Set dst (AddReductionVF dst src2)); 6478 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6479 format %{ "vaddss $dst,$dst,$src2\n\t" 6480 "pshufd $tmp,$src2,0x01\n\t" 6481 "vaddss $dst,$dst,$tmp\n\t" 6482 "pshufd $tmp,$src2,0x02\n\t" 6483 "vaddss $dst,$dst,$tmp\n\t" 6484 "pshufd $tmp,$src2,0x03\n\t" 6485 "vaddss $dst,$dst,$tmp\n\t" 6486 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6487 "vaddss $dst,$dst,$tmp2\n\t" 6488 "pshufd $tmp,$tmp2,0x01\n\t" 6489 "vaddss $dst,$dst,$tmp\n\t" 6490 "pshufd $tmp,$tmp2,0x02\n\t" 6491 "vaddss $dst,$dst,$tmp\n\t" 6492 "pshufd $tmp,$tmp2,0x03\n\t" 6493 "vaddss $dst,$dst,$tmp\n\t" 6494 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6495 "vaddss $dst,$dst,$tmp2\n\t" 6496 "pshufd $tmp,$tmp2,0x01\n\t" 6497 "vaddss $dst,$dst,$tmp\n\t" 6498 "pshufd $tmp,$tmp2,0x02\n\t" 6499 "vaddss $dst,$dst,$tmp\n\t" 6500 "pshufd $tmp,$tmp2,0x03\n\t" 6501 "vaddss $dst,$dst,$tmp\n\t" 6502 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6503 "vaddss $dst,$dst,$tmp2\n\t" 6504 "pshufd $tmp,$tmp2,0x01\n\t" 6505 "vaddss $dst,$dst,$tmp\n\t" 6506 "pshufd $tmp,$tmp2,0x02\n\t" 6507 "vaddss $dst,$dst,$tmp\n\t" 6508 "pshufd $tmp,$tmp2,0x03\n\t" 6509 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 6510 ins_encode %{ 6511 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6512 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6513 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6514 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6515 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6516 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6517 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6518 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6519 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6520 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6521 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6522 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6523 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6524 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6525 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6526 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6527 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6528 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6529 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6530 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6531 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6532 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6533 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6534 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6535 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6536 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6537 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6538 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6539 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6540 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6541 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6547 predicate(UseSSE >= 1 && UseAVX == 0); 6548 match(Set dst (AddReductionVD dst src2)); 6549 effect(TEMP tmp, TEMP dst); 6550 format %{ "addsd $dst,$src2\n\t" 6551 "pshufd $tmp,$src2,0xE\n\t" 6552 "addsd $dst,$tmp\t! add reduction2D" %} 6553 ins_encode %{ 6554 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 6555 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6556 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 6557 %} 6558 ins_pipe( pipe_slow ); 6559 %} 6560 6561 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6562 predicate(UseAVX > 0); 6563 match(Set dst (AddReductionVD dst src2)); 6564 effect(TEMP tmp, TEMP dst); 6565 format %{ "vaddsd $dst,$dst,$src2\n\t" 6566 "pshufd $tmp,$src2,0xE\n\t" 6567 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 6568 ins_encode %{ 6569 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6570 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6571 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6572 %} 6573 ins_pipe( pipe_slow ); 6574 %} 6575 6576 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 6577 predicate(UseAVX > 0); 6578 match(Set dst (AddReductionVD dst src2)); 6579 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6580 format %{ "vaddsd $dst,$dst,$src2\n\t" 6581 "pshufd $tmp,$src2,0xE\n\t" 6582 "vaddsd $dst,$dst,$tmp\n\t" 6583 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6584 "vaddsd $dst,$dst,$tmp2\n\t" 6585 "pshufd $tmp,$tmp2,0xE\n\t" 6586 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 6587 ins_encode %{ 6588 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6589 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6590 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6591 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6592 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6593 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6594 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6595 %} 6596 ins_pipe( pipe_slow ); 6597 %} 6598 6599 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 6600 predicate(UseAVX > 2); 6601 match(Set dst (AddReductionVD dst src2)); 6602 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6603 format %{ "vaddsd $dst,$dst,$src2\n\t" 6604 "pshufd $tmp,$src2,0xE\n\t" 6605 "vaddsd $dst,$dst,$tmp\n\t" 6606 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6607 "vaddsd $dst,$dst,$tmp2\n\t" 6608 "pshufd $tmp,$tmp2,0xE\n\t" 6609 "vaddsd $dst,$dst,$tmp\n\t" 6610 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6611 "vaddsd $dst,$dst,$tmp2\n\t" 6612 "pshufd $tmp,$tmp2,0xE\n\t" 6613 "vaddsd $dst,$dst,$tmp\n\t" 6614 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6615 "vaddsd $dst,$dst,$tmp2\n\t" 6616 "pshufd $tmp,$tmp2,0xE\n\t" 6617 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 6618 ins_encode %{ 6619 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6620 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6621 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6622 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6623 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6624 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6625 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6626 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6627 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6628 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6629 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6630 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6631 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6632 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6633 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 instruct rssub2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6639 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6640 match(Set dst (SubReductionVFP dst src2)); 6641 effect(TEMP dst, TEMP tmp); 6642 format %{ "subss $dst,$src2\n\t" 6643 "pshufd $tmp,$src2,0x01\n\t" 6644 "subss $dst,$dst,$tmp\t! sub reduction2F" %} 6645 ins_encode %{ 6646 __ subss($dst$$XMMRegister, $src2$$XMMRegister); 6647 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6648 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6649 %} 6650 ins_pipe( pipe_slow ); 6651 %} 6652 6653 instruct rvsub2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6654 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6655 match(Set dst (SubReductionVFP dst src2)); 6656 effect(TEMP dst, TEMP tmp); 6657 format %{ "vsubss $dst,$dst,$src2\n\t" 6658 "pshufd $tmp,$src2,0x01\n\t" 6659 "vsubss $dst,$dst,$tmp\t! sub reduction2F" %} 6660 ins_encode %{ 6661 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6662 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6663 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6664 %} 6665 ins_pipe( pipe_slow ); 6666 %} 6667 6668 instruct rssub4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6669 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6670 match(Set dst (SubReductionVFP dst src2)); 6671 effect(TEMP dst, TEMP tmp); 6672 format %{ "subss $dst,$src2\n\t" 6673 "pshufd $tmp,$src2,0x01\n\t" 6674 "subss $dst,$tmp\n\t" 6675 "pshufd $tmp,$src2,0x02\n\t" 6676 "subss $dst,$tmp\n\t" 6677 "pshufd $tmp,$src2,0x03\n\t" 6678 "subss $dst,$tmp\t! sub reduction4F" %} 6679 ins_encode %{ 6680 __ subss($dst$$XMMRegister, $src2$$XMMRegister); 6681 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6682 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6683 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6684 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6685 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6686 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6687 %} 6688 ins_pipe( pipe_slow ); 6689 %} 6690 6691 instruct rvsub4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6692 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6693 match(Set dst (SubReductionVFP dst src2)); 6694 effect(TEMP tmp, TEMP dst); 6695 format %{ "vsubss $dst,dst,$src2\n\t" 6696 "pshufd $tmp,$src2,0x01\n\t" 6697 "vsubss $dst,$dst,$tmp\n\t" 6698 "pshufd $tmp,$src2,0x02\n\t" 6699 "vsubss $dst,$dst,$tmp\n\t" 6700 "pshufd $tmp,$src2,0x03\n\t" 6701 "vsubss $dst,$dst,$tmp\t! sub reduction4F" %} 6702 ins_encode %{ 6703 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6704 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6705 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6706 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6707 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6708 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6709 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6710 %} 6711 ins_pipe( pipe_slow ); 6712 %} 6713 6714 instruct rsub8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 6715 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6716 match(Set dst (SubReductionVFP dst src2)); 6717 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6718 format %{ "vsubss $dst,$dst,$src2\n\t" 6719 "pshufd $tmp,$src2,0x01\n\t" 6720 "vsubss $dst,$dst,$tmp\n\t" 6721 "pshufd $tmp,$src2,0x02\n\t" 6722 "vsubss $dst,$dst,$tmp\n\t" 6723 "pshufd $tmp,$src2,0x03\n\t" 6724 "vsubss $dst,$dst,$tmp\n\t" 6725 "vextractf128_high $tmp2,$src2\n\t" 6726 "vsubss $dst,$dst,$tmp2\n\t" 6727 "pshufd $tmp,$tmp2,0x01\n\t" 6728 "vsubss $dst,$dst,$tmp\n\t" 6729 "pshufd $tmp,$tmp2,0x02\n\t" 6730 "vsubss $dst,$dst,$tmp\n\t" 6731 "pshufd $tmp,$tmp2,0x03\n\t" 6732 "vsubss $dst,$dst,$tmp\t! sub reduction8F" %} 6733 ins_encode %{ 6734 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6735 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6736 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6737 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6738 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6739 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6740 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6741 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6742 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6743 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6744 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6745 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6746 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6747 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6748 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6749 %} 6750 ins_pipe( pipe_slow ); 6751 %} 6752 6753 instruct rsub16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 6754 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6755 match(Set dst (SubReductionVFP dst src2)); 6756 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6757 format %{ "vsubss $dst,$dst,$src2\n\t" 6758 "pshufd $tmp,$src2,0x01\n\t" 6759 "vsubss $dst,$dst,$tmp\n\t" 6760 "pshufd $tmp,$src2,0x02\n\t" 6761 "vsubss $dst,$dst,$tmp\n\t" 6762 "pshufd $tmp,$src2,0x03\n\t" 6763 "vsubss $dst,$dst,$tmp\n\t" 6764 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6765 "vsubss $dst,$dst,$tmp2\n\t" 6766 "pshufd $tmp,$tmp2,0x01\n\t" 6767 "vsubss $dst,$dst,$tmp\n\t" 6768 "pshufd $tmp,$tmp2,0x02\n\t" 6769 "vsubss $dst,$dst,$tmp\n\t" 6770 "pshufd $tmp,$tmp2,0x03\n\t" 6771 "vsubss $dst,$dst,$tmp\n\t" 6772 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6773 "vsubss $dst,$dst,$tmp2\n\t" 6774 "pshufd $tmp,$tmp2,0x01\n\t" 6775 "vsubss $dst,$dst,$tmp\n\t" 6776 "pshufd $tmp,$tmp2,0x02\n\t" 6777 "vsubss $dst,$dst,$tmp\n\t" 6778 "pshufd $tmp,$tmp2,0x03\n\t" 6779 "vsubss $dst,$dst,$tmp\n\t" 6780 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6781 "vsubss $dst,$dst,$tmp2\n\t" 6782 "pshufd $tmp,$tmp2,0x01\n\t" 6783 "vsubss $dst,$dst,$tmp\n\t" 6784 "pshufd $tmp,$tmp2,0x02\n\t" 6785 "vsubss $dst,$dst,$tmp\n\t" 6786 "pshufd $tmp,$tmp2,0x03\n\t" 6787 "vsubss $dst,$dst,$tmp\t! sub reduction16F" %} 6788 ins_encode %{ 6789 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6790 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6791 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6792 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6793 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6794 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6795 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6796 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6797 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6798 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6799 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6800 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6801 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6802 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6803 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6804 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6805 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6806 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6807 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6808 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6809 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6810 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6811 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6812 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6813 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6814 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6815 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6816 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6817 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6818 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6819 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6820 %} 6821 ins_pipe( pipe_slow ); 6822 %} 6823 6824 instruct rssub2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6825 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6826 match(Set dst (SubReductionVFP dst src2)); 6827 effect(TEMP tmp, TEMP dst); 6828 format %{ "subsd $dst,$src2\n\t" 6829 "pshufd $tmp,$src2,0xE\n\t" 6830 "subsd $dst,$tmp\t! sub reduction2D" %} 6831 ins_encode %{ 6832 __ subsd($dst$$XMMRegister, $src2$$XMMRegister); 6833 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6834 __ subsd($dst$$XMMRegister, $tmp$$XMMRegister); 6835 %} 6836 ins_pipe( pipe_slow ); 6837 %} 6838 6839 instruct rvsub2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6840 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6841 match(Set dst (SubReductionVFP dst src2)); 6842 effect(TEMP tmp, TEMP dst); 6843 format %{ "vsubsd $dst,$dst,$src2\n\t" 6844 "pshufd $tmp,$src2,0xE\n\t" 6845 "vsubsd $dst,$dst,$tmp\t! sub reduction2D" %} 6846 ins_encode %{ 6847 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6848 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6849 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6850 %} 6851 ins_pipe( pipe_slow ); 6852 %} 6853 6854 instruct rvsub4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 6855 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6856 match(Set dst (SubReductionVFP dst src2)); 6857 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6858 format %{ "vsubsd $dst,$dst,$src2\n\t" 6859 "pshufd $tmp,$src2,0xE\n\t" 6860 "vsubsd $dst,$dst,$tmp\n\t" 6861 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6862 "vsubsd $dst,$dst,$tmp2\n\t" 6863 "pshufd $tmp,$tmp2,0xE\n\t" 6864 "vsubsd $dst,$dst,$tmp\t! sub reduction4D" %} 6865 ins_encode %{ 6866 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6867 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6868 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6869 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6870 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6871 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6872 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6873 %} 6874 ins_pipe( pipe_slow ); 6875 %} 6876 6877 instruct rvsub8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 6878 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6879 match(Set dst (SubReductionVFP dst src2)); 6880 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6881 format %{ "vsubsd $dst,$dst,$src2\n\t" 6882 "pshufd $tmp,$src2,0xE\n\t" 6883 "vsubsd $dst,$dst,$tmp\n\t" 6884 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6885 "vsubsd $dst,$dst,$tmp2\n\t" 6886 "pshufd $tmp,$tmp2,0xE\n\t" 6887 "vsubsd $dst,$dst,$tmp\n\t" 6888 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6889 "vsubsd $dst,$dst,$tmp2\n\t" 6890 "pshufd $tmp,$tmp2,0xE\n\t" 6891 "vsubsd $dst,$dst,$tmp\n\t" 6892 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6893 "vsubsd $dst,$dst,$tmp2\n\t" 6894 "pshufd $tmp,$tmp2,0xE\n\t" 6895 "vsubsd $dst,$dst,$tmp\t! sub reduction8D" %} 6896 ins_encode %{ 6897 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6898 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6899 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6900 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6901 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6902 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6903 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6904 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6905 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6906 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6907 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6908 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6909 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6910 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6911 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6912 %} 6913 ins_pipe( pipe_slow ); 6914 %} 6915 6916 instruct rsmul8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 6917 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6918 match(Set dst (MulReductionVI src1 src2)); 6919 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 6920 format %{ "pmovsxbw $tmp,$src2\n\t" 6921 "pshufd $tmp1,$tmp,0xE\n\t" 6922 "pmullw $tmp,$tmp1\n\t" 6923 "pshufd $tmp1,$tmp,0x1\n\t" 6924 "pmullw $tmp,$tmp1\n\t" 6925 "pextrw $tmp2,$tmp, 0x1\n\t" 6926 "pextrw $tmp3,$tmp, 0x0\n\t" 6927 "imul $tmp2,$tmp3 \n\t" 6928 "movsbl $dst,$src1\n\t" 6929 "imull $dst,$tmp2\n\t" 6930 "movsbl $dst,$dst\t! mul reduction8B" %} 6931 ins_encode %{ 6932 __ pmovsxbw($tmp$$XMMRegister, $src2$$XMMRegister); 6933 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 6934 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6935 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 6936 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6937 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6938 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6939 __ imull($tmp2$$Register, $tmp3$$Register); 6940 __ movsbl($dst$$Register, $src1$$Register); 6941 __ imull($dst$$Register, $tmp2$$Register); 6942 __ movsbl($dst$$Register, $dst$$Register); 6943 %} 6944 ins_pipe( pipe_slow ); 6945 %} 6946 6947 instruct rsmul16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 6948 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6949 match(Set dst (MulReductionVI src1 src2)); 6950 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 6951 format %{ "pmovsxbw $tmp,$src2\n\t" 6952 "pshufd $tmp1,$src2,0xEE\n\t" 6953 "pmovsxbw $tmp1,$tmp1\n\t" 6954 "pmullw $tmp,$tmp1\n\t" 6955 "pshufd $tmp1,$tmp,0xE\n\t" 6956 "pmullw $tmp,$tmp1\n\t" 6957 "pshufd $tmp1,$tmp,0x1\n\t" 6958 "pmullw $tmp,$tmp1\n\t" 6959 "pextrw $tmp2,$tmp, 0x1\n\t" 6960 "pextrw $tmp3,$tmp, 0x0\n\t" 6961 "imull $tmp2,$tmp3 \n\t" 6962 "movsbl $dst,$src1\n\t" 6963 "imull $dst,$tmp2\n\t" 6964 "movsbl $dst,$dst\t! mul reduction16B" %} 6965 ins_encode %{ 6966 int vector_len = 0; 6967 __ pmovsxbw($tmp$$XMMRegister, $src2$$XMMRegister); 6968 __ pshufd($tmp1$$XMMRegister, $src2$$XMMRegister, 0xEE); 6969 __ pmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister); 6970 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6971 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 6972 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6973 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 6974 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6975 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6976 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6977 __ imull($tmp2$$Register, $tmp3$$Register); 6978 __ movsbl($dst$$Register, $src1$$Register); 6979 __ imull($dst$$Register, $tmp2$$Register); 6980 __ movsbl($dst$$Register, $dst$$Register); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 instruct rvmul32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 6986 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6987 match(Set dst (MulReductionVI src1 src2)); 6988 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 6989 format %{ "vextracti128_high $tmp,$src2\n\t" 6990 "pmovsxbw $tmp,$tmp\n\t" 6991 "pmovsxbw $tmp1,$src2\n\t" 6992 "vpmullw $tmp,$tmp,$tmp1\n\t" 6993 "vextracti128_high $tmp1,$tmp\n\t" 6994 "vpmullw $tmp,$tmp,$tmp1\n\t" 6995 "pshufd $tmp1,$tmp,0xE\n\t" 6996 "vpmullw $tmp,$tmp,$tmp1\n\t" 6997 "pshufd $tmp1,$tmp,0x1\n\t" 6998 "vpmullw $tmp,$tmp,$tmp1\n\t" 6999 "pextrw $tmp2,$tmp, 0x1\n\t" 7000 "pextrw $tmp3,$tmp, 0x0\n\t" 7001 "imull $tmp2,$tmp3 \n\t" 7002 "movsbl $dst,$src1\n\t" 7003 "imull $dst,$tmp2\n\t" 7004 "movsbl $dst,$dst\t! mul reduction32B" %} 7005 ins_encode %{ 7006 int vector_len = 1; 7007 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7008 __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 7009 __ vpmovsxbw($tmp1$$XMMRegister, $src2$$XMMRegister, vector_len); 7010 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7011 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 7012 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7013 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7014 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7015 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7016 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7017 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7018 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7019 __ imull($tmp2$$Register, $tmp3$$Register); 7020 __ movsbl($dst$$Register, $src1$$Register); 7021 __ imull($dst$$Register, $tmp2$$Register); 7022 __ movsbl($dst$$Register, $dst$$Register); 7023 %} 7024 ins_pipe( pipe_slow ); 7025 %} 7026 7027 instruct rvmul64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7028 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7029 match(Set dst (MulReductionVI src1 src2)); 7030 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7031 format %{ "vextracti64x4_high $tmp,$src2\n\t" 7032 "vpmovsxbw $tmp,$tmp\n\t" 7033 "vpmovsxbw $tmp1,$src2\n\t" 7034 "vpmullw $tmp,$tmp,$tmp1\n\t" 7035 "vextracti64x4_high $tmp1,$tmp\n\t" 7036 "vpmullw $tmp,$tmp,$tmp1\n\t" 7037 "vextracti128_high $tmp1,$tmp\n\t" 7038 "vpmullw $tmp,$tmp,$tmp1\n\t" 7039 "pshufd $tmp1,$tmp,0xE\n\t" 7040 "vpmullw $tmp,$tmp,$tmp1\n\t" 7041 "pshufd $tmp1,$tmp,0x1\n\t" 7042 "vpmullw $tmp,$tmp,$tmp1\n\t" 7043 "pextrw $tmp2,$tmp, 0x1\n\t" 7044 "pextrw $tmp3,$tmp, 0x0\n\t" 7045 "imull $tmp2,$tmp3 \n\t" 7046 "movsbl $dst,$src1\n\t" 7047 "imull $dst,$tmp2\n\t" 7048 "movsbl $dst,$dst\t! mul reduction64B" %} 7049 ins_encode %{ 7050 int vector_len = 2; 7051 __ vextracti64x4_high($tmp$$XMMRegister, $src2$$XMMRegister); 7052 __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 7053 __ vpmovsxbw($tmp1$$XMMRegister, $src2$$XMMRegister, vector_len); 7054 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7055 __ vextracti64x4_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 7056 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 1); 7057 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 7058 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7059 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7060 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7061 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7062 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7063 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7064 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7065 __ imull($tmp2$$Register, $tmp3$$Register); 7066 __ movsbl($dst$$Register, $src1$$Register); 7067 __ imull($dst$$Register, $tmp2$$Register); 7068 __ movsbl($dst$$Register, $dst$$Register); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct rsmul4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 7074 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7075 match(Set dst (MulReductionVI src1 src2)); 7076 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3); 7077 format %{ "pshufd $tmp,$src2,0x1\n\t" 7078 "pmullw $tmp,$src2\n\t" 7079 "pextrw $tmp2,$tmp, 0x1\n\t" 7080 "pextrw $tmp3,$tmp, 0x0\n\t" 7081 "imull $tmp2,$tmp3 \n\t" 7082 "movswl $dst,$src1\n\t" 7083 "imull $dst,$tmp2\n\t" 7084 "movswl $dst,$dst\t! mul reduction4S" %} 7085 ins_encode %{ 7086 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 7087 __ pmullw($tmp$$XMMRegister, $src2$$XMMRegister); 7088 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7089 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7090 __ imull($tmp2$$Register, $tmp3$$Register); 7091 __ movswl($dst$$Register, $src1$$Register); 7092 __ imull($dst$$Register, $tmp2$$Register); 7093 __ movswl($dst$$Register, $dst$$Register); 7094 %} 7095 ins_pipe( pipe_slow ); 7096 %} 7097 7098 instruct rsmul8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7099 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7100 match(Set dst (MulReductionVI src1 src2)); 7101 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7102 format %{ "pshufd $tmp,$src2,0xE\n\t" 7103 "pmullw $tmp,$src2\n\t" 7104 "pshufd $tmp1,$tmp,0x1\n\t" 7105 "pmullw $tmp,$tmp1\n\t" 7106 "pextrw $tmp2,$tmp, 0x1\n\t" 7107 "pextrw $tmp3,$tmp, 0x0\n\t" 7108 "imul $tmp2,$tmp3 \n\t" 7109 "movswl $dst,$src1\n\t" 7110 "imull $dst,$tmp2\n\t" 7111 "movswl $dst,$dst\t! mul reduction8S" %} 7112 ins_encode %{ 7113 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7114 __ pmullw($tmp$$XMMRegister, $src2$$XMMRegister); 7115 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7116 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 7117 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7118 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7119 __ imull($tmp2$$Register, $tmp3$$Register); 7120 __ movswl($dst$$Register, $src1$$Register); 7121 __ imull($dst$$Register, $tmp2$$Register); 7122 __ movswl($dst$$Register, $dst$$Register); 7123 %} 7124 ins_pipe( pipe_slow ); 7125 %} 7126 7127 instruct rvmul16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7128 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7129 match(Set dst (MulReductionVI src1 src2)); 7130 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7131 format %{ "vextracti128_high $tmp,$src2\n\t" 7132 "vpmullw $tmp,$tmp,$src2\n\t" 7133 "pshufd $tmp1,$tmp,0xE\n\t" 7134 "pmullw $tmp,$tmp1\n\t" 7135 "pshufd $tmp1,$tmp,0x1\n\t" 7136 "pmullw $tmp,$tmp1\n\t" 7137 "pextrw $tmp2,$tmp, 0x1\n\t" 7138 "pextrw $tmp3,$tmp, 0x0\n\t" 7139 "imul $tmp2,$tmp3 \n\t" 7140 "movswl $dst,$src1\n\t" 7141 "imull $dst,$tmp2\n\t" 7142 "movswl $dst,$dst\t! mul reduction16S" %} 7143 ins_encode %{ 7144 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7145 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 1); 7146 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7147 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7148 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7149 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7150 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7151 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7152 __ imull($tmp2$$Register, $tmp3$$Register); 7153 __ movswl($dst$$Register, $src1$$Register); 7154 __ imull($dst$$Register, $tmp2$$Register); 7155 __ movswl($dst$$Register, $dst$$Register); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct rvmul32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7161 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7162 match(Set dst (MulReductionVI src1 src2)); 7163 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7164 format %{ "vextracti64x4_high $tmp1,$src2\n\t" 7165 "vpmullw $tmp1,$tmp1,$src2\n\t" 7166 "vextracti128_high $tmp,$tmp1\n\t" 7167 "vpmullw $tmp,$tmp,$tmp1\n\t" 7168 "pshufd $tmp1,$tmp,0xE\n\t" 7169 "pmullw $tmp,$tmp1\n\t" 7170 "pshufd $tmp1,$tmp,0x1\n\t" 7171 "pmullw $tmp,$tmp1\n\t" 7172 "pextrw $tmp2,$tmp, 0x1\n\t" 7173 "pextrw $tmp3,$tmp, 0x0\n\t" 7174 "imul $tmp2,$tmp3 \n\t" 7175 "movswl $dst,$src1\n\t" 7176 "imull $dst,$tmp2\n\t" 7177 "movswl $dst,$dst\t! mul reduction32S" %} 7178 ins_encode %{ 7179 int vector_len = 0; 7180 __ vextracti64x4_high($tmp1$$XMMRegister, $src2$$XMMRegister); 7181 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $src2$$XMMRegister, 1); 7182 __ vextracti128_high($tmp$$XMMRegister, $tmp1$$XMMRegister); 7183 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7184 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7185 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7186 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7187 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7188 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7189 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7190 __ imull($tmp2$$Register, $tmp3$$Register); 7191 __ movswl($dst$$Register, $src1$$Register); 7192 __ imull($dst$$Register, $tmp2$$Register); 7193 __ movswl($dst$$Register, $dst$$Register); 7194 %} 7195 ins_pipe( pipe_slow ); 7196 %} 7197 7198 7199 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 7200 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7201 match(Set dst (MulReductionVI src1 src2)); 7202 effect(TEMP tmp, TEMP tmp2); 7203 format %{ "pshufd $tmp2,$src2,0x1\n\t" 7204 "pmulld $tmp2,$src2\n\t" 7205 "movd $tmp,$src1\n\t" 7206 "pmulld $tmp2,$tmp\n\t" 7207 "movd $dst,$tmp2\t! mul reduction2I" %} 7208 ins_encode %{ 7209 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7210 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 7211 __ movdl($tmp$$XMMRegister, $src1$$Register); 7212 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 7213 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7214 %} 7215 ins_pipe( pipe_slow ); 7216 %} 7217 7218 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 7219 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7220 match(Set dst (MulReductionVI src1 src2)); 7221 effect(TEMP tmp, TEMP tmp2); 7222 format %{ "pshufd $tmp2,$src2,0x1\n\t" 7223 "vpmulld $tmp,$src2,$tmp2\n\t" 7224 "movd $tmp2,$src1\n\t" 7225 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7226 "movd $dst,$tmp2\t! mul reduction2I" %} 7227 ins_encode %{ 7228 int vector_len = 0; 7229 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7230 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7231 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7232 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7233 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7234 %} 7235 ins_pipe( pipe_slow ); 7236 %} 7237 7238 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 7239 predicate(UseSSE > 3 && UseAVX == 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7240 match(Set dst (MulReductionVI src1 src2)); 7241 effect(TEMP tmp, TEMP tmp2); 7242 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7243 "pmulld $tmp2,$src2\n\t" 7244 "pshufd $tmp,$tmp2,0x1\n\t" 7245 "pmulld $tmp2,$tmp\n\t" 7246 "movd $tmp,$src1\n\t" 7247 "pmulld $tmp2,$tmp\n\t" 7248 "movd $dst,$tmp2\t! mul reduction4I" %} 7249 ins_encode %{ 7250 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7251 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 7252 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 7253 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 7254 __ movdl($tmp$$XMMRegister, $src1$$Register); 7255 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 7256 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7257 %} 7258 ins_pipe( pipe_slow ); 7259 %} 7260 7261 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 7262 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7263 match(Set dst (MulReductionVI src1 src2)); 7264 effect(TEMP tmp, TEMP tmp2); 7265 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7266 "vpmulld $tmp,$src2,$tmp2\n\t" 7267 "pshufd $tmp2,$tmp,0x1\n\t" 7268 "vpmulld $tmp,$tmp,$tmp2\n\t" 7269 "movd $tmp2,$src1\n\t" 7270 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7271 "movd $dst,$tmp2\t! mul reduction4I" %} 7272 ins_encode %{ 7273 int vector_len = 0; 7274 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7275 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7276 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 7277 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7278 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7279 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7280 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 7286 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7287 match(Set dst (MulReductionVI src1 src2)); 7288 effect(TEMP tmp, TEMP tmp2); 7289 format %{ "vextracti128_high $tmp,$src2\n\t" 7290 "vpmulld $tmp,$tmp,$src2\n\t" 7291 "pshufd $tmp2,$tmp,0xE\n\t" 7292 "vpmulld $tmp,$tmp,$tmp2\n\t" 7293 "pshufd $tmp2,$tmp,0x1\n\t" 7294 "vpmulld $tmp,$tmp,$tmp2\n\t" 7295 "movd $tmp2,$src1\n\t" 7296 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7297 "movd $dst,$tmp2\t! mul reduction8I" %} 7298 ins_encode %{ 7299 int vector_len = 0; 7300 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7301 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7302 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 7303 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7304 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 7305 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7306 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7307 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7308 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7309 %} 7310 ins_pipe( pipe_slow ); 7311 %} 7312 7313 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 7314 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7315 match(Set dst (MulReductionVI src1 src2)); 7316 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 7317 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 7318 "vpmulld $tmp3,$tmp3,$src2\n\t" 7319 "vextracti128_high $tmp,$tmp3\n\t" 7320 "vpmulld $tmp,$tmp,$src2\n\t" 7321 "pshufd $tmp2,$tmp,0xE\n\t" 7322 "vpmulld $tmp,$tmp,$tmp2\n\t" 7323 "pshufd $tmp2,$tmp,0x1\n\t" 7324 "vpmulld $tmp,$tmp,$tmp2\n\t" 7325 "movd $tmp2,$src1\n\t" 7326 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7327 "movd $dst,$tmp2\t! mul reduction16I" %} 7328 ins_encode %{ 7329 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 7330 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 7331 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 7332 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 7333 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 7334 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7335 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 7336 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7337 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7338 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7339 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7340 %} 7341 ins_pipe( pipe_slow ); 7342 %} 7343 7344 #ifdef _LP64 7345 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 7346 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 7347 match(Set dst (MulReductionVL src1 src2)); 7348 effect(TEMP tmp, TEMP tmp2); 7349 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7350 "vpmullq $tmp,$src2,$tmp2\n\t" 7351 "movdq $tmp2,$src1\n\t" 7352 "vpmullq $tmp2,$tmp,$tmp2\n\t" 7353 "movdq $dst,$tmp2\t! mul reduction2L" %} 7354 ins_encode %{ 7355 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7356 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 7357 __ movdq($tmp2$$XMMRegister, $src1$$Register); 7358 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7359 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7360 %} 7361 ins_pipe( pipe_slow ); 7362 %} 7363 7364 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 7365 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 7366 match(Set dst (MulReductionVL src1 src2)); 7367 effect(TEMP tmp, TEMP tmp2); 7368 format %{ "vextracti128_high $tmp,$src2\n\t" 7369 "vpmullq $tmp2,$tmp,$src2\n\t" 7370 "pshufd $tmp,$tmp2,0xE\n\t" 7371 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7372 "movdq $tmp,$src1\n\t" 7373 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7374 "movdq $dst,$tmp2\t! mul reduction4L" %} 7375 ins_encode %{ 7376 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7377 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 7378 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7379 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7380 __ movdq($tmp$$XMMRegister, $src1$$Register); 7381 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7382 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7383 %} 7384 ins_pipe( pipe_slow ); 7385 %} 7386 7387 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 7388 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 7389 match(Set dst (MulReductionVL src1 src2)); 7390 effect(TEMP tmp, TEMP tmp2); 7391 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 7392 "vpmullq $tmp2,$tmp2,$src2\n\t" 7393 "vextracti128_high $tmp,$tmp2\n\t" 7394 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7395 "pshufd $tmp,$tmp2,0xE\n\t" 7396 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7397 "movdq $tmp,$src1\n\t" 7398 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7399 "movdq $dst,$tmp2\t! mul reduction8L" %} 7400 ins_encode %{ 7401 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7402 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 7403 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 7404 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7405 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7406 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7407 __ movdq($tmp$$XMMRegister, $src1$$Register); 7408 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7409 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7410 %} 7411 ins_pipe( pipe_slow ); 7412 %} 7413 #endif 7414 7415 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 7416 predicate(UseSSE >= 1 && UseAVX == 0); 7417 match(Set dst (MulReductionVF dst src2)); 7418 effect(TEMP dst, TEMP tmp); 7419 format %{ "mulss $dst,$src2\n\t" 7420 "pshufd $tmp,$src2,0x01\n\t" 7421 "mulss $dst,$tmp\t! mul reduction2F" %} 7422 ins_encode %{ 7423 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 7424 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7425 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7426 %} 7427 ins_pipe( pipe_slow ); 7428 %} 7429 7430 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 7431 predicate(UseAVX > 0); 7432 match(Set dst (MulReductionVF dst src2)); 7433 effect(TEMP tmp, TEMP dst); 7434 format %{ "vmulss $dst,$dst,$src2\n\t" 7435 "pshufd $tmp,$src2,0x01\n\t" 7436 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 7437 ins_encode %{ 7438 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7439 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7440 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7441 %} 7442 ins_pipe( pipe_slow ); 7443 %} 7444 7445 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 7446 predicate(UseSSE >= 1 && UseAVX == 0); 7447 match(Set dst (MulReductionVF dst src2)); 7448 effect(TEMP dst, TEMP tmp); 7449 format %{ "mulss $dst,$src2\n\t" 7450 "pshufd $tmp,$src2,0x01\n\t" 7451 "mulss $dst,$tmp\n\t" 7452 "pshufd $tmp,$src2,0x02\n\t" 7453 "mulss $dst,$tmp\n\t" 7454 "pshufd $tmp,$src2,0x03\n\t" 7455 "mulss $dst,$tmp\t! mul reduction4F" %} 7456 ins_encode %{ 7457 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 7458 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7459 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7460 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7461 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7462 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7463 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7464 %} 7465 ins_pipe( pipe_slow ); 7466 %} 7467 7468 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 7469 predicate(UseAVX > 0); 7470 match(Set dst (MulReductionVF dst src2)); 7471 effect(TEMP tmp, TEMP dst); 7472 format %{ "vmulss $dst,$dst,$src2\n\t" 7473 "pshufd $tmp,$src2,0x01\n\t" 7474 "vmulss $dst,$dst,$tmp\n\t" 7475 "pshufd $tmp,$src2,0x02\n\t" 7476 "vmulss $dst,$dst,$tmp\n\t" 7477 "pshufd $tmp,$src2,0x03\n\t" 7478 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 7479 ins_encode %{ 7480 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7481 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7482 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7483 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7484 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7485 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7486 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7487 %} 7488 ins_pipe( pipe_slow ); 7489 %} 7490 7491 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 7492 predicate(UseAVX > 0); 7493 match(Set dst (MulReductionVF dst src2)); 7494 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7495 format %{ "vmulss $dst,$dst,$src2\n\t" 7496 "pshufd $tmp,$src2,0x01\n\t" 7497 "vmulss $dst,$dst,$tmp\n\t" 7498 "pshufd $tmp,$src2,0x02\n\t" 7499 "vmulss $dst,$dst,$tmp\n\t" 7500 "pshufd $tmp,$src2,0x03\n\t" 7501 "vmulss $dst,$dst,$tmp\n\t" 7502 "vextractf128_high $tmp2,$src2\n\t" 7503 "vmulss $dst,$dst,$tmp2\n\t" 7504 "pshufd $tmp,$tmp2,0x01\n\t" 7505 "vmulss $dst,$dst,$tmp\n\t" 7506 "pshufd $tmp,$tmp2,0x02\n\t" 7507 "vmulss $dst,$dst,$tmp\n\t" 7508 "pshufd $tmp,$tmp2,0x03\n\t" 7509 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 7510 ins_encode %{ 7511 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7512 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7513 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7514 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7515 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7516 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7517 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7518 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7519 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7520 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7521 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7522 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7523 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7524 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7525 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7526 %} 7527 ins_pipe( pipe_slow ); 7528 %} 7529 7530 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 7531 predicate(UseAVX > 2); 7532 match(Set dst (MulReductionVF dst src2)); 7533 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7534 format %{ "vmulss $dst,$dst,$src2\n\t" 7535 "pshufd $tmp,$src2,0x01\n\t" 7536 "vmulss $dst,$dst,$tmp\n\t" 7537 "pshufd $tmp,$src2,0x02\n\t" 7538 "vmulss $dst,$dst,$tmp\n\t" 7539 "pshufd $tmp,$src2,0x03\n\t" 7540 "vmulss $dst,$dst,$tmp\n\t" 7541 "vextractf32x4 $tmp2,$src2,0x1\n\t" 7542 "vmulss $dst,$dst,$tmp2\n\t" 7543 "pshufd $tmp,$tmp2,0x01\n\t" 7544 "vmulss $dst,$dst,$tmp\n\t" 7545 "pshufd $tmp,$tmp2,0x02\n\t" 7546 "vmulss $dst,$dst,$tmp\n\t" 7547 "pshufd $tmp,$tmp2,0x03\n\t" 7548 "vmulss $dst,$dst,$tmp\n\t" 7549 "vextractf32x4 $tmp2,$src2,0x2\n\t" 7550 "vmulss $dst,$dst,$tmp2\n\t" 7551 "pshufd $tmp,$tmp2,0x01\n\t" 7552 "vmulss $dst,$dst,$tmp\n\t" 7553 "pshufd $tmp,$tmp2,0x02\n\t" 7554 "vmulss $dst,$dst,$tmp\n\t" 7555 "pshufd $tmp,$tmp2,0x03\n\t" 7556 "vmulss $dst,$dst,$tmp\n\t" 7557 "vextractf32x4 $tmp2,$src2,0x3\n\t" 7558 "vmulss $dst,$dst,$tmp2\n\t" 7559 "pshufd $tmp,$tmp2,0x01\n\t" 7560 "vmulss $dst,$dst,$tmp\n\t" 7561 "pshufd $tmp,$tmp2,0x02\n\t" 7562 "vmulss $dst,$dst,$tmp\n\t" 7563 "pshufd $tmp,$tmp2,0x03\n\t" 7564 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 7565 ins_encode %{ 7566 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7567 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7568 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7569 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7570 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7571 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7572 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7573 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7574 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7575 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7576 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7577 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7578 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7579 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7580 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7581 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 7582 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7583 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7584 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7585 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7586 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7587 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7588 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7589 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 7590 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7591 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7592 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7593 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7594 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7595 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7596 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7597 %} 7598 ins_pipe( pipe_slow ); 7599 %} 7600 7601 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 7602 predicate(UseSSE >= 1 && UseAVX == 0); 7603 match(Set dst (MulReductionVD dst src2)); 7604 effect(TEMP dst, TEMP tmp); 7605 format %{ "mulsd $dst,$src2\n\t" 7606 "pshufd $tmp,$src2,0xE\n\t" 7607 "mulsd $dst,$tmp\t! mul reduction2D" %} 7608 ins_encode %{ 7609 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 7610 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7611 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 7612 %} 7613 ins_pipe( pipe_slow ); 7614 %} 7615 7616 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 7617 predicate(UseAVX > 0); 7618 match(Set dst (MulReductionVD dst src2)); 7619 effect(TEMP tmp, TEMP dst); 7620 format %{ "vmulsd $dst,$dst,$src2\n\t" 7621 "pshufd $tmp,$src2,0xE\n\t" 7622 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 7623 ins_encode %{ 7624 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7625 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7626 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7627 %} 7628 ins_pipe( pipe_slow ); 7629 %} 7630 7631 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 7632 predicate(UseAVX > 0); 7633 match(Set dst (MulReductionVD dst src2)); 7634 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7635 format %{ "vmulsd $dst,$dst,$src2\n\t" 7636 "pshufd $tmp,$src2,0xE\n\t" 7637 "vmulsd $dst,$dst,$tmp\n\t" 7638 "vextractf128_high $tmp2,$src2\n\t" 7639 "vmulsd $dst,$dst,$tmp2\n\t" 7640 "pshufd $tmp,$tmp2,0xE\n\t" 7641 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 7642 ins_encode %{ 7643 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7644 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7645 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7646 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7647 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7648 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7649 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7650 %} 7651 ins_pipe( pipe_slow ); 7652 %} 7653 7654 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 7655 predicate(UseAVX > 2); 7656 match(Set dst (MulReductionVD dst src2)); 7657 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7658 format %{ "vmulsd $dst,$dst,$src2\n\t" 7659 "pshufd $tmp,$src2,0xE\n\t" 7660 "vmulsd $dst,$dst,$tmp\n\t" 7661 "vextractf32x4 $tmp2,$src2,0x1\n\t" 7662 "vmulsd $dst,$dst,$tmp2\n\t" 7663 "pshufd $tmp,$src2,0xE\n\t" 7664 "vmulsd $dst,$dst,$tmp\n\t" 7665 "vextractf32x4 $tmp2,$src2,0x2\n\t" 7666 "vmulsd $dst,$dst,$tmp2\n\t" 7667 "pshufd $tmp,$tmp2,0xE\n\t" 7668 "vmulsd $dst,$dst,$tmp\n\t" 7669 "vextractf32x4 $tmp2,$src2,0x3\n\t" 7670 "vmulsd $dst,$dst,$tmp2\n\t" 7671 "pshufd $tmp,$tmp2,0xE\n\t" 7672 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 7673 ins_encode %{ 7674 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7675 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7676 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7677 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7678 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7679 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7680 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7681 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 7682 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7683 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7684 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7685 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 7686 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7687 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7688 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7689 %} 7690 ins_pipe( pipe_slow ); 7691 %} 7692 7693 //--------------------Min Reduction -------------------- 7694 instruct rsmin8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7695 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7696 match(Set dst (MinReductionV src1 src2)); 7697 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7698 format %{ "pshufd $tmp,$src2,0x1\n\t" 7699 "pminsb $tmp,$src2\n\t" 7700 "pextrb $tmp2,$tmp, 0x1\n\t" 7701 "movsbl $tmp2,$tmp2\n\t" 7702 "pextrb $tmp3,$tmp,0x0\n\t" 7703 "movsbl $tmp3,$tmp3\n\t" 7704 "cmpl $tmp2,$tmp3\n\t" 7705 "cmovl $tmp3,$tmp2\n\t" 7706 "cmpl $src1,$tmp3\n\t" 7707 "cmovl $tmp3,$src1, 0x0\n\t" 7708 "movl $dst,$tmp2\n\t" 7709 "pextrb $tmp2,$tmp\n\t" 7710 "movsbl $tmp2,$tmp2\n\t" 7711 "pextrb $tmp3,$tmp\n\t" 7712 "movsbl $tmp3,$tmp3\n\t" 7713 "cmpl $tmp2,$tmp3\n\t" 7714 "cmovl $tmp3,$tmp2\n\t" 7715 "cmpl $tmp3,$dst\n\t" 7716 "cmovl $dst,$tmp3\t! min reduction4S" %} 7717 ins_encode %{ 7718 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 7719 __ pminsb($tmp$$XMMRegister, $src2$$XMMRegister); 7720 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7721 __ movsbl($tmp2$$Register, $tmp2$$Register); 7722 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7723 __ movsbl($tmp3$$Register, $tmp3$$Register); 7724 __ cmpl($tmp2$$Register, $tmp3$$Register); 7725 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7726 __ cmpl($src1$$Register, $tmp3$$Register); 7727 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7728 __ movl($dst$$Register, $tmp3$$Register); 7729 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7730 __ movsbl($tmp2$$Register, $tmp2$$Register); 7731 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7732 __ movsbl($tmp3$$Register, $tmp3$$Register); 7733 __ cmpl($tmp2$$Register, $tmp3$$Register); 7734 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7735 __ cmpl($tmp3$$Register, $dst$$Register); 7736 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7737 __ movsbl($dst$$Register, $dst$$Register); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 instruct rsmin16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7743 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7744 match(Set dst (MinReductionV src1 src2)); 7745 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7746 format %{ "pshufd $tmp4,$src2,0xE\n\t" 7747 "pminsb $tmp4,$src2\n\t" 7748 "pshufd $tmp,$tmp4,0x1\n\t" 7749 "pminsb $tmp,$tmp4\n\t" 7750 "pextrb $tmp2,$tmp, 0x1\n\t" 7751 "movsbl $tmp2,$tmp2\n\t" 7752 "pextrb $tmp3,$tmp,0x0\n\t" 7753 "movsbl $tmp3,$tmp3\n\t" 7754 "cmpl $tmp2,$tmp3\n\t" 7755 "cmovl $tmp3,$tmp2\n\t" 7756 "cmpl $src1,$tmp3\n\t" 7757 "cmovl $tmp3,$src1, 0x0\n\t" 7758 "movl $dst,$tmp2\n\t" 7759 "pextrb $tmp2,$tmp\n\t" 7760 "movsbl $tmp2,$tmp2\n\t" 7761 "pextrb $tmp3,$tmp\n\t" 7762 "movsbl $tmp3,$tmp3\n\t" 7763 "cmpl $tmp2,$tmp3\n\t" 7764 "cmovl $tmp3,$tmp2\n\t" 7765 "cmpl $tmp3,$dst\n\t" 7766 "cmovl $dst,$tmp3\t! min reduction4S" %} 7767 ins_encode %{ 7768 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 7769 __ pminsb($tmp4$$XMMRegister, $src2$$XMMRegister); 7770 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7771 __ pminsb($tmp$$XMMRegister, $tmp4$$XMMRegister); 7772 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7773 __ movsbl($tmp2$$Register, $tmp2$$Register); 7774 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7775 __ movsbl($tmp3$$Register, $tmp3$$Register); 7776 __ cmpl($tmp2$$Register, $tmp3$$Register); 7777 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7778 __ cmpl($src1$$Register, $tmp3$$Register); 7779 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7780 __ movl($dst$$Register, $tmp3$$Register); 7781 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7782 __ movsbl($tmp2$$Register, $tmp2$$Register); 7783 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7784 __ movsbl($tmp3$$Register, $tmp3$$Register); 7785 __ cmpl($tmp2$$Register, $tmp3$$Register); 7786 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7787 __ cmpl($tmp3$$Register, $dst$$Register); 7788 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7789 __ movsbl($dst$$Register, $dst$$Register); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct rvmin16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7795 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7796 match(Set dst (MinReductionV src1 src2)); 7797 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7798 format %{ "pshufd $tmp4,$src2,0xE\n\t" 7799 "vpminsb $tmp,$tmp4,$src2\n\t" 7800 "pshufd $tmp,$tmp4,0x1\n\t" 7801 "vpminsb $tmp,$tmp,$tmp4\n\t" 7802 "pextrb $tmp2,$tmp, 0x1\n\t" 7803 "movsbl $tmp2,$tmp2\n\t" 7804 "pextrb $tmp3,$tmp,0x0\n\t" 7805 "movsbl $tmp3,$tmp3\n\t" 7806 "cmpl $tmp2,$tmp3\n\t" 7807 "cmovl $tmp3,$tmp2\n\t" 7808 "cmpl $src1,$tmp3\n\t" 7809 "cmovl $tmp3,$src1, 0x0\n\t" 7810 "movl $dst,$tmp2\n\t" 7811 "pextrb $tmp2,$tmp\n\t" 7812 "movsbl $tmp2,$tmp2\n\t" 7813 "pextrb $tmp3,$tmp\n\t" 7814 "movsbl $tmp3,$tmp3\n\t" 7815 "cmpl $tmp2,$tmp3\n\t" 7816 "cmovl $tmp3,$tmp2\n\t" 7817 "cmpl $tmp3,$dst\n\t" 7818 "cmovl $dst,$tmp3\t! min reduction4S" %} 7819 ins_encode %{ 7820 int vector_len = 0; 7821 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 7822 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 0); 7823 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7824 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 7825 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7826 __ movsbl($tmp2$$Register, $tmp2$$Register); 7827 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7828 __ movsbl($tmp3$$Register, $tmp3$$Register); 7829 __ cmpl($tmp2$$Register, $tmp3$$Register); 7830 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7831 __ cmpl($src1$$Register, $tmp3$$Register); 7832 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7833 __ movl($dst$$Register, $tmp3$$Register); 7834 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7835 __ movsbl($tmp2$$Register, $tmp2$$Register); 7836 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7837 __ movsbl($tmp3$$Register, $tmp3$$Register); 7838 __ cmpl($tmp2$$Register, $tmp3$$Register); 7839 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7840 __ cmpl($tmp3$$Register, $dst$$Register); 7841 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7842 __ movsbl($dst$$Register, $dst$$Register); 7843 %} 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 instruct rvmin32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7848 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7849 match(Set dst (MinReductionV src1 src2)); 7850 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7851 format %{ "vextracti128_high $tmp,$src2\n\t" 7852 "vpminsb $tmp,$tmp,$src2\n\t" 7853 "pshufd $tmp4,$tmp,0xE\n\t" 7854 "vpminsb $tmp4,$tmp4,$tmp\n\t" 7855 "pshufd $tmp,$tmp4,0x1\n\t" 7856 "vpminsb $tmp,$tmp,$tmp4\n\t" 7857 "pextrb $tmp2,$tmp, 0x1\n\t" 7858 "movsbl $tmp2,$tmp2\n\t" 7859 "pextrb $tmp3,$tmp,0x0\n\t" 7860 "movsbl $tmp3,$tmp3\n\t" 7861 "cmpl $tmp2,$tmp3\n\t" 7862 "cmovl $tmp3,$tmp2\n\t" 7863 "cmpl $src1,$tmp3\n\t" 7864 "cmovl $tmp3,$src1, 0x0\n\t" 7865 "movl $dst,$tmp2\n\t" 7866 "pextrb $tmp2,$tmp\n\t" 7867 "movsbl $tmp2,$tmp2\n\t" 7868 "pextrb $tmp3,$tmp\n\t" 7869 "movsbl $tmp3,$tmp3\n\t" 7870 "cmpl $tmp2,$tmp3\n\t" 7871 "cmovl $tmp3,$tmp2\n\t" 7872 "cmpl $tmp3,$dst\n\t" 7873 "cmovl $dst,$tmp3\t! min reduction4S" %} 7874 ins_encode %{ 7875 int vector_len = 1; 7876 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7877 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7878 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 7879 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 7880 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7881 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 7882 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7883 __ movsbl($tmp2$$Register, $tmp2$$Register); 7884 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7885 __ movsbl($tmp3$$Register, $tmp3$$Register); 7886 __ cmpl($tmp2$$Register, $tmp3$$Register); 7887 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7888 __ cmpl($src1$$Register, $tmp3$$Register); 7889 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7890 __ movl($dst$$Register, $tmp3$$Register); 7891 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7892 __ movsbl($tmp2$$Register, $tmp2$$Register); 7893 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7894 __ movsbl($tmp3$$Register, $tmp3$$Register); 7895 __ cmpl($tmp2$$Register, $tmp3$$Register); 7896 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7897 __ cmpl($tmp3$$Register, $dst$$Register); 7898 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7899 __ movsbl($dst$$Register, $dst$$Register); 7900 %} 7901 ins_pipe( pipe_slow ); 7902 %} 7903 7904 instruct rvmin64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7905 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7906 match(Set dst (MinReductionV src1 src2)); 7907 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7908 format %{ "vextracti64x4_high $tmp4,$src2\n\t" 7909 "vpminsb $tmp4,$tmp4,$src2\n\t" 7910 "vextracti128_high $tmp,$tmp4\n\t" 7911 "vpminsb $tmp,$tmp,$tmp4\n\t" 7912 "pshufd $tmp4,$tmp,0xE\n\t" 7913 "vpminsb $tmp4,$tmp4,$tmp\n\t" 7914 "pshufd $tmp,$tmp4,0x1\n\t" 7915 "vpminsb $tmp,$tmp,$tmp4\n\t" 7916 "pextrb $tmp2,$tmp, 0x1\n\t" 7917 "movsbl $tmp2,$tmp2\n\t" 7918 "pextrb $tmp3,$tmp,0x0\n\t" 7919 "movsbl $tmp3,$tmp3\n\t" 7920 "cmpl $tmp2,$tmp3\n\t" 7921 "cmovl $tmp3,$tmp2\n\t" 7922 "cmpl $src1,$tmp3\n\t" 7923 "cmovl $tmp3,$src1, 0x0\n\t" 7924 "movl $dst,$tmp2\n\t" 7925 "pextrb $tmp2,$tmp\n\t" 7926 "movsbl $tmp2,$tmp2\n\t" 7927 "pextrb $tmp3,$tmp\n\t" 7928 "movsbl $tmp3,$tmp3\n\t" 7929 "cmpl $tmp2,$tmp3\n\t" 7930 "cmovl $tmp3,$tmp2\n\t" 7931 "cmpl $tmp3,$dst\n\t" 7932 "cmovl $dst,$tmp3\t! min reduction4S" %} 7933 ins_encode %{ 7934 __ vextracti64x4_high($tmp4$$XMMRegister, $src2$$XMMRegister); 7935 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 2); 7936 __ vextracti128_high($tmp$$XMMRegister, $tmp4$$XMMRegister); 7937 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 1); 7938 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 7939 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 7940 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7941 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 7942 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7943 __ movsbl($tmp2$$Register, $tmp2$$Register); 7944 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7945 __ movsbl($tmp3$$Register, $tmp3$$Register); 7946 __ cmpl($tmp2$$Register, $tmp3$$Register); 7947 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7948 __ cmpl($src1$$Register, $tmp3$$Register); 7949 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7950 __ movl($dst$$Register, $tmp3$$Register); 7951 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7952 __ movsbl($tmp2$$Register, $tmp2$$Register); 7953 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7954 __ movsbl($tmp3$$Register, $tmp3$$Register); 7955 __ cmpl($tmp2$$Register, $tmp3$$Register); 7956 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7957 __ cmpl($tmp3$$Register, $dst$$Register); 7958 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7959 __ movsbl($dst$$Register, $dst$$Register); 7960 %} 7961 ins_pipe( pipe_slow ); 7962 %} 7963 7964 instruct rsmin4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 7965 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7966 match(Set dst (MinReductionV src1 src2)); 7967 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 7968 format %{ "pshufd $tmp,$src2,0x1\n\t" 7969 "pminsw $tmp,$src2\n\t" 7970 "pextrw $tmp2,$tmp, 0x1\n\t" 7971 "movswl $tmp2,$tmp2\n\t" 7972 "pextrb $tmp3,$tmp, 0x0\n\t" 7973 "movswl $tmp3,$tmp3,0x1\n\t" 7974 "cmpl $tmp2,$tmp3\n\t" 7975 "cmovl $tmp3,tmp2\n\t" 7976 "cmpl $src1,$tmp3\n\t" 7977 "cmovl $tmp3,$src1\n\t" 7978 "movswl $dst,$tmp3\t! min reduction4S" %} 7979 ins_encode %{ 7980 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 7981 __ pminsw($tmp$$XMMRegister, $src2$$XMMRegister); 7982 __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); 7983 __ movswl($tmp2$$Register, $tmp2$$Register); 7984 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 7985 __ movswl($tmp3$$Register, $tmp3$$Register); 7986 __ cmpl($tmp2$$Register, $tmp3$$Register); 7987 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7988 __ cmpl($src1$$Register, $tmp3$$Register); 7989 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7990 __ movl($dst$$Register, $tmp3$$Register); 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 instruct rsmin8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 7996 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7997 match(Set dst (MinReductionV src1 src2)); 7998 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7999 format %{ "pshufd $tmp2,$src2,0xE\n\t" 8000 "pminsw $tmp2,$src2\n\t" 8001 "pshufd $tmp,$tmp2,0x1\n\t" 8002 "pminsw $tmp,$tmp2\n\t" 8003 "pextrw $tmp2,$tmp\n\t" 8004 "movswl $tmp2,$tmp2\n\t" 8005 "pextrw $tmp3,$tmp, 0x0\n\t" 8006 "movswl $tmp3,$tmp3\n\t" 8007 "cmpl $tmp2,$tmp3\n\t" 8008 "cmovl $tmp3,$tmp2\n\t" 8009 "cmpl $src1,$tmp3\n\t" 8010 "cmovl $tmp3,$src1\n\t" 8011 "movl $dst,$tmp3\t! min reduction8S" %} 8012 ins_encode %{ 8013 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister,0xE); 8014 __ pminsw($tmp2$$XMMRegister, $src2$$XMMRegister); 8015 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8016 __ pminsw($tmp$$XMMRegister, $tmp2$$XMMRegister); 8017 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8018 __ movswl($tmp4$$Register, $tmp4$$Register); 8019 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8020 __ movswl($tmp3$$Register, $tmp3$$Register); 8021 __ cmpl($tmp4$$Register, $tmp3$$Register); 8022 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8023 __ cmpl($src1$$Register, $tmp3$$Register); 8024 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8025 __ movl($dst$$Register, $tmp3$$Register); 8026 %} 8027 ins_pipe( pipe_slow ); 8028 %} 8029 8030 instruct rvmin8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 8031 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 8032 match(Set dst (MinReductionV src1 src2)); 8033 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8034 format %{ "pshufd $tmp,$src2,0xE\n\t" 8035 "vpminsw $tmp,$tmp,$src2\n\t" 8036 "pshufd $tmp2,$tmp,0x1\n\t" 8037 "vpminsw $tmp,$tmp,$tmp2\n\t" 8038 "movzwl $dst,$src1\n\t" 8039 "pextrw $tmp3,$tmp, 0x0\n\t" 8040 "vpminsw $dst,$dst,$tmp3\n\t" 8041 "pextrw $tmp3,$tmp, 0x1\n\t" 8042 "vpminsw $dst,$dst,$tmp3\n\t" 8043 "movswl $dst,$dst\t! min reduction8S" %} 8044 ins_encode %{ 8045 int vector_len = 0; 8046 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8047 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8048 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 8049 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8050 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8051 __ movswl($tmp4$$Register, $tmp4$$Register); 8052 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8053 __ movswl($tmp3$$Register, $tmp3$$Register); 8054 __ cmpl($tmp4$$Register, $tmp3$$Register); 8055 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8056 __ cmpl($src1$$Register, $tmp3$$Register); 8057 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8058 __ movl($dst$$Register, $tmp3$$Register); 8059 %} 8060 ins_pipe( pipe_slow ); 8061 %} 8062 8063 instruct rvmin16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 8064 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 8065 match(Set dst (MinReductionV src1 src2)); 8066 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8067 format %{ "vextracti128_high $tmp,$src2\n\t" 8068 "vpminsw $tmp,$tmp,$src2\n\t" 8069 "pshufd $tmp2,$tmp,0xE\n\t" 8070 "vpminsw $tmp,$tmp,$tmp2\n\t" 8071 "pshufd $tmp2,$tmp,0x1\n\t" 8072 "vpminsw $tmp,$tmp,$tmp2\n\t" 8073 "pextrw $tmp2,$tmp, 0x1\n\t" 8074 "movswl $tmp2,$tmp2\n\t" 8075 "pextrw $tmp3,$tmp, 0x0\n\t" 8076 "movswl $tmp3,$tmp3\n\t" 8077 "cmpl $tmp2$tmp3\n\t" 8078 "cmovl $tmp3,$tmp2\n\t" 8079 "cmpl $src1,$tmp3\n\t" 8080 "cmovl $tmp3,$src1\n\t" 8081 "movl $dst,$tmp3\t! min reduction16S" %} 8082 ins_encode %{ 8083 int vector_len = 1; 8084 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8085 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8086 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8087 __ vpminsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8088 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 8089 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8090 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8091 __ movswl($tmp4$$Register, $tmp4$$Register); 8092 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8093 __ movswl($tmp3$$Register, $tmp3$$Register); 8094 __ cmpl($tmp4$$Register, $tmp3$$Register); 8095 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8096 __ cmpl($src1$$Register, $tmp3$$Register); 8097 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8098 __ movl($dst$$Register, $tmp3$$Register); 8099 %} 8100 ins_pipe( pipe_slow ); 8101 %} 8102 8103 instruct rvmin32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 8104 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 8105 match(Set dst (MinReductionV src1 src2)); 8106 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8107 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 8108 "vpminsw $tmp2,$tmp2,$src2\n\t" 8109 "vextracti128_high $tmp,$tmp2\n\t" 8110 "vpminsw $tmp,$tmp,$tmp2\n\t" 8111 "pshufd $tmp2,$tmp,0xE\n\t" 8112 "vpminsw $tmp,$tmp,$tmp2\n\t" 8113 "pshufd $tmp2,$tmp,0x1\n\t" 8114 "vpminsw $tmp,$tmp,$tmp2\n\t" 8115 "pextrw $tmp3,$tmp, 0x0\n\t" 8116 "movswl $dst,$src1\n\t" 8117 "pextrw $tmp3,$tmp, 0x0\n\t" 8118 "movswl $dst,$src1\n\t" 8119 "cmpl $tmp2$tmp3\n\t" 8120 "cmovl $tmp3,$tmp2\n\t" 8121 "cmpl $src1,$tmp3\n\t" 8122 "cmovl $tmp3,$src1\n\t" 8123 "movl $dst,$dst\t! min reduction32S" %} 8124 ins_encode %{ 8125 int vector_len = 2; 8126 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 8127 __ vpminsw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8128 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 8129 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8130 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8131 __ vpminsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8132 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 8133 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8134 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8135 __ movswl($tmp4$$Register, $tmp4$$Register); 8136 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8137 __ movswl($tmp3$$Register, $tmp3$$Register); 8138 __ cmpl($tmp4$$Register, $tmp3$$Register); 8139 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8140 __ cmpl($src1$$Register, $tmp3$$Register); 8141 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8142 __ movl($dst$$Register, $tmp3$$Register); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 instruct rsmin2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 8148 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8149 match(Set dst (MinReductionV src1 src2)); 8150 effect(TEMP tmp, TEMP tmp2); 8151 format %{ "pshufd $tmp,$src2,0x1\n\t" 8152 "pminsd $tmp,$src2\n\t" 8153 "movd $tmp2,$src1\n\t" 8154 "pminsd $tmp2,$tmp\n\t" 8155 "movd $dst,$tmp2\t! min reduction2I" %} 8156 ins_encode %{ 8157 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8158 __ pminsd($tmp$$XMMRegister, $src2$$XMMRegister); 8159 __ movdl($tmp2$$XMMRegister, $src1$$Register); 8160 __ pminsd($tmp2$$XMMRegister, $tmp$$XMMRegister); 8161 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8162 %} 8163 ins_pipe( pipe_slow ); 8164 %} 8165 8166 instruct rvmin2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 8167 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8168 match(Set dst (MinReductionV src1 src2)); 8169 effect(TEMP tmp, TEMP tmp2); 8170 format %{ "pshufd $tmp,$src2,0x1\n\t" 8171 "vpminsd $tmp2,$tmp,$src2\n\t" 8172 "movd $tmp,$src1\n\t" 8173 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8174 "movd $dst,$tmp2\t! min reduction2I" %} 8175 ins_encode %{ 8176 int vector_len = 0; 8177 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8178 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8179 __ movdl($tmp2$$XMMRegister, $src1$$Register); 8180 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8181 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8182 %} 8183 ins_pipe( pipe_slow ); 8184 %} 8185 8186 instruct rsmin4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 8187 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8188 match(Set dst (MinReductionV src1 src2)); 8189 effect(TEMP tmp, TEMP tmp2); 8190 format %{ "pshufd $tmp,$src2,0xE\n\t" 8191 "pminsd $tmp,$src2\n\t" 8192 "pshufd $tmp2,$tmp,0x1\n\t" 8193 "pminsd $tmp2,$tmp\n\t" 8194 "movd $tmp,$src1\n\t" 8195 "pminsd $tmp2,$tmp\n\t" 8196 "movd $dst,$tmp2\t! min reduction4I" %} 8197 ins_encode %{ 8198 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8199 __ pminsd($tmp$$XMMRegister, $src2$$XMMRegister); 8200 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 8201 __ pminsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 8202 __ movdl($tmp$$XMMRegister, $src1$$Register); 8203 __ pminsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 8204 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8205 %} 8206 ins_pipe( pipe_slow ); 8207 %} 8208 8209 instruct rvmin4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 8210 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8211 match(Set dst (MinReductionV src1 src2)); 8212 effect(TEMP tmp, TEMP tmp2); 8213 format %{ "pshufd $tmp,$src2,0xE\n\t" 8214 "vpminsd $tmp2,$tmp,$src2\n\t" 8215 "pshufd $tmp,$tmp2,0x1\n\t" 8216 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8217 "movd $tmp,$src1\n\t" 8218 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8219 "movd $dst,$tmp2\t! min reduction4I" %} 8220 ins_encode %{ 8221 int vector_len = 0; 8222 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8223 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8224 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8225 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8226 __ movdl($tmp$$XMMRegister, $src1$$Register); 8227 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8228 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8229 %} 8230 ins_pipe( pipe_slow ); 8231 %} 8232 8233 instruct rvmin4I_reduction_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 8234 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8235 match(Set dst (MinReductionV src1 src2)); 8236 effect(TEMP tmp, TEMP tmp2); 8237 format %{ "pshufd $tmp,$src2,0xE\n\t" 8238 "vpminsd $tmp2,$tmp,$src2\n\t" 8239 "pshufd $tmp,$tmp2,0x1\n\t" 8240 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8241 "movd $tmp,$src1\n\t" 8242 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8243 "movd $dst,$tmp2\t! min reduction4I" %} 8244 ins_encode %{ 8245 int vector_len = 0; 8246 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8247 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8248 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8249 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8250 __ movdl($tmp$$XMMRegister, $src1$$Register); 8251 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8252 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8253 %} 8254 ins_pipe( pipe_slow ); 8255 %} 8256 8257 instruct rvmin8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 8258 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8259 match(Set dst (MinReductionV src1 src2)); 8260 effect(TEMP tmp, TEMP tmp2); 8261 format %{ "vextracti128_high $tmp,$src2\n\t" 8262 "vpminsd $tmp,$tmp,$src2\n\t" 8263 "pshufd $tmp2,$tmp,0xE\n\t" 8264 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8265 "pshufd $tmp,$tmp2,0x1\n\t" 8266 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8267 "movd $tmp,$src1\n\t" 8268 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8269 "movd $dst,$tmp2\t! min reduction8I" %} 8270 ins_encode %{ 8271 int vector_len = 1; 8272 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8273 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8274 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8275 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8276 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8277 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8278 __ movdl($tmp$$XMMRegister, $src1$$Register); 8279 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8280 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 instruct rvmin8I_reduction_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 8286 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8287 match(Set dst (MinReductionV src1 src2)); 8288 effect(TEMP tmp, TEMP tmp2); 8289 format %{ "vextracti128_high $tmp,$src2\n\t" 8290 "vpminsd $tmp,$tmp,$src2\n\t" 8291 "pshufd $tmp2,$tmp,0xE\n\t" 8292 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8293 "pshufd $tmp,$tmp2,0x1\n\t" 8294 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8295 "movd $tmp,$src1\n\t" 8296 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8297 "movd $dst,$tmp2\t! min reduction8I" %} 8298 ins_encode %{ 8299 int vector_len = 1; 8300 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8301 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8302 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8303 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8304 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8305 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8306 __ movdl($tmp$$XMMRegister, $src1$$Register); 8307 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8308 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 instruct rvmin16I_reduction_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 8314 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8315 match(Set dst (MinReductionV src1 src2)); 8316 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8317 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 8318 "vpminsd $tmp3,$tmp3,$src2\n\t" 8319 "vextracti128_high $tmp,$tmp3\n\t" 8320 "vpminsd $tmp,$tmp,$tmp3\n\t" 8321 "pshufd $tmp2,$tmp,0xE\n\t" 8322 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8323 "pshufd $tmp,$tmp2,0x1\n\t" 8324 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8325 "movd $tmp,$src1\n\t" 8326 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8327 "movd $dst,$tmp2\t! min reduction16I" %} 8328 ins_encode %{ 8329 int vector_len = 2; 8330 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 8331 __ vpminsd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 8332 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 8333 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8334 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8335 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8336 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8337 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8338 __ movdl($tmp$$XMMRegister, $src1$$Register); 8339 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8340 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8341 %} 8342 ins_pipe( pipe_slow ); 8343 %} 8344 8345 // Long Min Reduction 8346 instruct rsmin1L_reduction_reg(rRegL dst, rRegL src1, vecD src2, rxmm0 tmp, regF tmp2) %{ 8347 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8348 match(Set dst (MinReductionV src1 src2)); 8349 effect(TEMP tmp, TEMP tmp2); 8350 format %{ "movdq $tmp,$src1\n\t" 8351 "movdq $tmp2,$src1\n\t" 8352 "pcmpgtq $tmp,$src2\n\t" 8353 "blendvpd $tmp2,$src2\n\t" 8354 "movdq $dst,$tmp2\t! min reduction1L" %} 8355 ins_encode %{ 8356 __ movdq($tmp$$XMMRegister,$src1$$Register); 8357 __ movdq($tmp2$$XMMRegister,$src1$$Register); 8358 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 8359 __ blendvpd($tmp2$$XMMRegister,$src2$$XMMRegister); 8360 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8361 %} 8362 ins_pipe( pipe_slow ); 8363 %} 8364 8365 instruct rsmin2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, rxmm0 xmm_0, regF tmp2, regF tmp3) %{ 8366 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8367 match(Set dst (MinReductionV src1 src2)); 8368 effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); 8369 format %{ "pshufd $tmp3,$src2,0xE\n\t" 8370 "movdqu $xmm_0,$src2\n\t" 8371 "movdqu $tmp2,$src2\n\t" 8372 "pcmpgtq $xmm_0,$tmp3\n\t" 8373 "blendvpd $tmp2,$tmp3\n\t" 8374 "movdqu $xmm_0,$tmp2\n\t" 8375 "movdq $tmp3,$src1\n\t" 8376 "pcmpgtq $xmm_0,$tmp3\n\t" 8377 "blendvpd $tmp2,$tmp3\n\t" 8378 "movq $dst,$tmp2\t! min reduction2L" %} 8379 ins_encode %{ 8380 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 0xE); 8381 __ movdqu($xmm_0$$XMMRegister, $src2$$XMMRegister); 8382 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 8383 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 8384 __ blendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister); 8385 __ movdqu($xmm_0$$XMMRegister, $tmp2$$XMMRegister); 8386 __ movdq($tmp3$$XMMRegister, $src1$$Register); 8387 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 8388 __ blendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister); 8389 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8390 %} 8391 ins_pipe( pipe_slow ); 8392 %} 8393 8394 instruct rvmin2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2, regF tmp3) %{ 8395 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8396 match(Set dst (MinReductionV src1 src2)); 8397 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8398 format %{ "pshufd $tmp2,$src2,0xE\n\t" 8399 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 8400 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 8401 "movq $tmp,$src1\n\t" 8402 "vpcmpgtq $tmp3,$tmp2,$tmp\n\t" 8403 "blendvpd $tmp2,$tmp2,$src1,$tmp3\n\t" 8404 "movq $dst,$tmp2\t! min reduction2L" %} 8405 ins_encode %{ 8406 int vector_len = 0; 8407 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 8408 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8409 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8410 __ movdq($tmp$$XMMRegister,$src1$$Register); 8411 __ vpcmpgtq($tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8412 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister,$src1$$XMMRegister,$tmp3$$XMMRegister, vector_len); 8413 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8414 %} 8415 ins_pipe( pipe_slow ); 8416 %} 8417 8418 instruct rvmin4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 8419 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8420 match(Set dst (MinReductionV src1 src2)); 8421 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8422 format %{ "vextracti128_high $tmp2,$src2\n\t" 8423 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 8424 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 8425 "vpshufd $tmp3, $tmp2,0x1\n\t" 8426 "vpcmpgtq $tmp, $tmp3,$tmp\n\t2" 8427 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 8428 "movq $tmp2,$src1\n\t" 8429 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 8430 "blendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 8431 "movq $dst,$tmp2\t! min reduction2L" %} 8432 ins_encode %{ 8433 int vector_len = 1; 8434 __ vextracti128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 8435 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8436 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8437 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8438 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8439 __ vblendvpd($tmp3$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8440 __ movdq($tmp$$XMMRegister,$src1$$Register); 8441 __ vpcmpgtq($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 8442 __ vblendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister,$tmp$$XMMRegister,$tmp2$$XMMRegister, vector_len); 8443 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8444 %} 8445 ins_pipe( pipe_slow ); 8446 %} 8447 8448 instruct rvmin8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 8449 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8450 match(Set dst (MinReductionV src1 src2)); 8451 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8452 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 8453 "vpcmpgtq $tmp,$tmp3,$src2\n\t" 8454 "vblendvpd $tmp3,$tmp3,$src2,$tmp\n\t" 8455 "vextracti128_high $tmp2,$tmp3\n\t" 8456 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 8457 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 8458 "vpshufd $tmp3,$tmp2,0x1\n\t" 8459 "vpcmpgtq $tmp,$tmp3,$tmp2\n\t" 8460 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 8461 "movq $tmp2,$src1\n\t" 8462 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 8463 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 8464 "movq $dst,$tmp2\t! min reduction4I" %} 8465 ins_encode %{ 8466 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 8467 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 8468 __ vblendvpd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 1); 8469 __ vextracti128_high($tmp2$$XMMRegister, $tmp3$$XMMRegister); 8470 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 1); 8471 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, 1); 8472 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8473 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, 1); 8474 __ vblendvpd($tmp3$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, 1); 8475 __ movdq($tmp2$$XMMRegister, $src1$$Register); 8476 __ vpcmpgtq($tmp$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, 1); 8477 __ vblendvpd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, 1); 8478 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8479 %} 8480 ins_pipe( pipe_slow ); 8481 %} 8482 8483 // Float Min Reduction 8484 instruct rsmin2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 8485 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8486 match(Set dst (MinReductionV dst src2)); 8487 effect(TEMP dst, TEMP tmp); 8488 format %{ "minps $dst, $src2\n\t" 8489 "pshufd $tmp,$src2,0x1\n\t" 8490 "minps $dst,$tmp\t! min reduction2F" %} 8491 ins_encode %{ 8492 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 8493 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8494 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 instruct rvmin2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 8500 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8501 match(Set dst (MinReductionV dst src2)); 8502 effect(TEMP dst, TEMP tmp); 8503 format %{ "vminps $dst,$dst,$src2\n\t" 8504 "pshufd $tmp,$src2,0x1\n\t" 8505 "vminps $dst,$dst,$tmp\t! min reduction2F" %} 8506 ins_encode %{ 8507 int vector_len = 0; 8508 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8509 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8510 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8511 %} 8512 ins_pipe( pipe_slow ); 8513 %} 8514 8515 instruct rsmin4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 8516 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8517 match(Set dst (MinReductionV dst src2)); 8518 effect(TEMP tmp, TEMP dst); 8519 format %{ "minps $dst,$src2\n\t" 8520 "pshufd $tmp,$src2,0x1\n\t" 8521 "minps $dst,tmp\n\t" 8522 "pshufd $tmp,$src2,0x2\n\t" 8523 "minps $dst,tmp\n\t" 8524 "pshufd $tmp,$src2,0x3\n\t" 8525 "minps $dst,$tmp\t! min reduction4F" %} 8526 ins_encode %{ 8527 int vector_len = 0; 8528 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 8529 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8530 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8531 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 8532 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8533 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 8534 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8535 %} 8536 ins_pipe( pipe_slow ); 8537 %} 8538 8539 instruct rvmin4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 8540 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8541 match(Set dst (MinReductionV dst src2)); 8542 effect(TEMP tmp, TEMP dst); 8543 format %{ "vminps $dst,$dst,$src2\n\t" 8544 "pshufd $tmp,$src2,0x1\n\t" 8545 "vminps $dst,$dst,tmp\n\t" 8546 "pshufd $tmp,$src2,0x2\n\t" 8547 "vminps $dst,$dst,tmp\n\t" 8548 "pshufd $tmp,$src2,0x3\n\t" 8549 "vminps $dst,$dst,$tmp\t! min reduction4F" %} 8550 ins_encode %{ 8551 int vector_len = 0; 8552 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8553 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8554 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8555 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 8556 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8557 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 8558 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8559 %} 8560 ins_pipe( pipe_slow ); 8561 %} 8562 8563 instruct rvmin8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 8564 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8565 match(Set dst (MinReductionV dst src2)); 8566 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8567 format %{ "vminps $dst,$dst,$src2\n\t" 8568 "pshufd $tmp,$src2,0x01\n\t" 8569 "vminps $dst,$dst,$tmp\n\t" 8570 "pshufd $tmp,$src2,0x02\n\t" 8571 "vminps $dst,$dst,$tmp\n\t" 8572 "pshufd $tmp,$src2,0x03\n\t" 8573 "vminps $dst,$dst,$tmp\n\t" 8574 "vextractf128_high $tmp2,$src2\n\t" 8575 "vminps $dst,$dst,$tmp2\n\t" 8576 "pshufd $tmp,$tmp2,0x01\n\t" 8577 "vminps $dst,$dst,$tmp\n\t" 8578 "pshufd $tmp,$tmp2,0x02\n\t" 8579 "vminps $dst,$dst,$tmp\n\t" 8580 "pshufd $tmp,$tmp2,0x03\n\t" 8581 "vminps $dst,$dst,$tmp\t! sub reduction8F" %} 8582 ins_encode %{ 8583 int vector_len = 1; 8584 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8585 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 8586 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8587 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 8588 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8589 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 8590 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8591 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 8592 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8593 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8594 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8595 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8596 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8597 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8598 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct rvmin16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 8604 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8605 match(Set dst (MinReductionV dst src2)); 8606 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8607 format %{ "vminps $dst,$dst,$src2\n\t" 8608 "pshufd $tmp,$src2,0x01\n\t" 8609 "vminps $dst,$dst,$tmp\n\t" 8610 "pshufd $tmp,$src2,0x02\n\t" 8611 "vminps $dst,$dst,$tmp\n\t" 8612 "pshufd $tmp,$src2,0x03\n\t" 8613 "vminps $dst,$dst,$tmp\n\t" 8614 "vextractf32x4 $tmp2,$src2,0x1\n\t" 8615 "vminps $dst,$dst,$tmp2\n\t" 8616 "pshufd $tmp,$tmp2,0x01\n\t" 8617 "vminps $dst,$dst,$tmp\n\t" 8618 "pshufd $tmp,$tmp2,0x02\n\t" 8619 "vminps $dst,$dst,$tmp\n\t" 8620 "pshufd $tmp,$tmp2,0x03\n\t" 8621 "vminps $dst,$dst,$tmp\n\t" 8622 "vextractf32x4 $tmp2,$src2,0x2\n\t" 8623 "vminps $dst,$dst,$tmp2\n\t" 8624 "pshufd $tmp,$tmp2,0x01\n\t" 8625 "vminps $dst,$dst,$tmp\n\t" 8626 "pshufd $tmp,$tmp2,0x02\n\t" 8627 "vminps $dst,$dst,$tmp\n\t" 8628 "pshufd $tmp,$tmp2,0x03\n\t" 8629 "vminps $dst,$dst,$tmp\n\t" 8630 "vextractf32x4 $tmp2,$src2,0x3\n\t" 8631 "vminps $dst,$dst,$tmp2\n\t" 8632 "pshufd $tmp,$tmp2,0x01\n\t" 8633 "vminps $dst,$dst,$tmp\n\t" 8634 "pshufd $tmp,$tmp2,0x02\n\t" 8635 "vminps $dst,$dst,$tmp\n\t" 8636 "pshufd $tmp,$tmp2,0x03\n\t" 8637 "vminps $dst,$dst,$tmp\t! sub reduction16F" %} 8638 ins_encode %{ 8639 int vector_len = 2; 8640 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8641 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 8642 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8643 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 8644 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8645 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 8646 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8647 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 8648 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8649 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8650 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8651 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8652 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8653 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8654 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8655 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 8656 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8657 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8658 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8659 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8660 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8661 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8662 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8663 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 8664 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8665 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8666 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8667 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8668 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8669 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8670 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8671 %} 8672 ins_pipe( pipe_slow ); 8673 %} 8674 8675 instruct rsmin2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 8676 predicate(UseSSE >= 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8677 match(Set dst (MinReductionV dst src2)); 8678 effect(TEMP tmp, TEMP dst); 8679 format %{ "minpd $dst,$src2\n\t" 8680 "pshufd $tmp,$src2,0xE\n\t" 8681 "minpd $dst,$tmp\t! min reduction2D" %} 8682 ins_encode %{ 8683 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 8684 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8685 __ minpd($dst$$XMMRegister, $tmp$$XMMRegister); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 instruct rvmin2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 8691 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8692 match(Set dst (MinReductionV dst src2)); 8693 effect(TEMP tmp, TEMP dst); 8694 format %{ "vminpd $dst,$dst,$src2\n\t" 8695 "pshufd $tmp,$src2,0xE\n\t" 8696 "vminpd $dst,$dst,$tmp\t! min reduction2D" %} 8697 ins_encode %{ 8698 int vector_len = 0; 8699 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8700 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8701 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8702 %} 8703 ins_pipe( pipe_slow ); 8704 %} 8705 8706 instruct rvmin4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 8707 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8708 match(Set dst (MinReductionV dst src2)); 8709 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8710 format %{ "vminpd $dst,$dst,$src2\n\t" 8711 "pshufd $tmp,$src2,0xE\n\t" 8712 "vminpd $dst,$dst,$tmp\n\t" 8713 "vextractf32x4 $tmp2,$src2,0x1\n\t" 8714 "vminpd $dst,$dst,$tmp2\n\t" 8715 "pshufd $tmp,$tmp2,0xE\n\t" 8716 "vminpd $dst,$dst,$tmp\t! min reduction4D" %} 8717 ins_encode %{ 8718 int vector_len = 1; 8719 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8720 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8721 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8722 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 8723 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8724 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8725 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8726 %} 8727 ins_pipe( pipe_slow ); 8728 %} 8729 8730 instruct rvmin8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 8731 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8732 match(Set dst (MinReductionV dst src2)); 8733 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8734 format %{ "vminpd $dst,$dst,$src2\n\t" 8735 "pshufd $tmp,$src2,0xE\n\t" 8736 "vminpd $dst,$dst,$tmp\n\t" 8737 "vextractf32x4 $tmp2,$src2,0x1\n\t" 8738 "vminpd $dst,$dst,$tmp2\n\t" 8739 "pshufd $tmp,$tmp2,0xE\n\t" 8740 "vminpd $dst,$dst,$tmp\n\t" 8741 "vextractf32x4 $tmp2,$src2,0x2\n\t" 8742 "vminpd $dst,$dst,$tmp2\n\t" 8743 "pshufd $tmp,$tmp2,0xE\n\t" 8744 "vminpd $dst,$dst,$tmp\n\t" 8745 "vextractf32x4 $tmp2,$src2,0x3\n\t" 8746 "vminpd $dst,$dst,$tmp2\n\t" 8747 "pshufd $tmp,$tmp2,0xE\n\t" 8748 "vminpd $dst,$dst,$tmp\t! min reduction8D" %} 8749 ins_encode %{ 8750 int vector_len = 2; 8751 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8752 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8753 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8754 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 8755 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8756 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8757 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8758 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 8759 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8760 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8761 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8762 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 8763 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8764 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8765 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8766 %} 8767 ins_pipe( pipe_slow ); 8768 %} 8769 8770 // ------- Max Reduction ------------ 8771 8772 instruct rsmax8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8773 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8774 match(Set dst (MaxReductionV src1 src2)); 8775 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8776 format %{ "pshufd $tmp,$src2,0x1\n\t" 8777 "pminsb $tmp,$src2\n\t" 8778 "pextrb $tmp2,$tmp, 0x1\n\t" 8779 "movsbl $tmp2,$tmp2\n\t" 8780 "pextrb $tmp3,$tmp,0x0\n\t" 8781 "movsbl $tmp3,$tmp3\n\t" 8782 "cmpl $tmp2,$tmp3\n\t" 8783 "cmovl $tmp3,$tmp2\n\t" 8784 "cmpl $src1,$tmp3\n\t" 8785 "cmovl $tmp3,$src1, 0x0\n\t" 8786 "movl $dst,$tmp2\n\t" 8787 "pextrb $tmp2,$tmp\n\t" 8788 "movsbl $tmp2,$tmp2\n\t" 8789 "pextrb $tmp3,$tmp\n\t" 8790 "movsbl $tmp3,$tmp3\n\t" 8791 "cmpl $tmp2,$tmp3\n\t" 8792 "cmovl $tmp3,$tmp2\n\t" 8793 "cmpl $tmp3,$dst\n\t" 8794 "cmovl $dst,$tmp3\t! min reduction4S" %} 8795 ins_encode %{ 8796 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 8797 __ pmaxsb($tmp$$XMMRegister, $src2$$XMMRegister); 8798 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8799 __ movsbl($tmp2$$Register, $tmp2$$Register); 8800 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8801 __ movsbl($tmp3$$Register, $tmp3$$Register); 8802 __ cmpl($tmp2$$Register, $tmp3$$Register); 8803 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8804 __ cmpl($src1$$Register, $tmp3$$Register); 8805 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8806 __ movl($dst$$Register, $tmp3$$Register); 8807 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8808 __ movsbl($tmp2$$Register, $tmp2$$Register); 8809 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8810 __ movsbl($tmp3$$Register, $tmp3$$Register); 8811 __ cmpl($tmp2$$Register, $tmp3$$Register); 8812 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8813 __ cmpl($tmp3$$Register, $dst$$Register); 8814 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8815 __ movsbl($dst$$Register, $dst$$Register); 8816 %} 8817 ins_pipe( pipe_slow ); 8818 %} 8819 8820 instruct rsmax16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8821 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8822 match(Set dst (MaxReductionV src1 src2)); 8823 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8824 format %{ "pshufd $tmp4,$src2,0xE\n\t" 8825 "pmaxsb $tmp4,$src2\n\t" 8826 "pshufd $tmp,$tmp4,0x1\n\t" 8827 "pmaxsb $tmp,$tmp4\n\t" 8828 "pextrb $tmp2,$tmp, 0x1\n\t" 8829 "movsbl $tmp2,$tmp2\n\t" 8830 "pextrb $tmp3,$tmp,0x0\n\t" 8831 "movsbl $tmp3,$tmp3\n\t" 8832 "cmpl $tmp2,$tmp3\n\t" 8833 "cmovl $tmp3,$tmp2\n\t" 8834 "cmpl $src1,$tmp3\n\t" 8835 "cmovl $tmp3,$src1, 0x0\n\t" 8836 "movl $dst,$tmp2\n\t" 8837 "pextrb $tmp2,$tmp\n\t" 8838 "movsbl $tmp2,$tmp2\n\t" 8839 "pextrb $tmp3,$tmp\n\t" 8840 "movsbl $tmp3,$tmp3\n\t" 8841 "cmpl $tmp2,$tmp3\n\t" 8842 "cmovl $tmp3,$tmp2\n\t" 8843 "cmpl $tmp3,$dst\n\t" 8844 "cmovl $dst,$tmp3\t! max reduction4S" %} 8845 ins_encode %{ 8846 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 8847 __ pmaxsb($tmp4$$XMMRegister, $src2$$XMMRegister); 8848 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 8849 __ pmaxsb($tmp$$XMMRegister, $tmp4$$XMMRegister); 8850 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8851 __ movsbl($tmp2$$Register, $tmp2$$Register); 8852 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8853 __ movsbl($tmp3$$Register, $tmp3$$Register); 8854 __ cmpl($tmp2$$Register, $tmp3$$Register); 8855 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8856 __ cmpl($src1$$Register, $tmp3$$Register); 8857 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8858 __ movl($dst$$Register, $tmp3$$Register); 8859 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8860 __ movsbl($tmp2$$Register, $tmp2$$Register); 8861 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8862 __ movsbl($tmp3$$Register, $tmp3$$Register); 8863 __ cmpl($tmp2$$Register, $tmp3$$Register); 8864 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8865 __ cmpl($tmp3$$Register, $dst$$Register); 8866 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8867 __ movsbl($dst$$Register, $dst$$Register); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct rvmax16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8873 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8874 match(Set dst (MaxReductionV src1 src2)); 8875 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8876 format %{ "pshufd $tmp4,$src2,0xE\n\t" 8877 "vpmaxsb $tmp,$tmp4,$src2\n\t" 8878 "pshufd $tmp,$tmp4,0x1\n\t" 8879 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8880 "pextrb $tmp2,$tmp, 0x1\n\t" 8881 "movsbl $tmp2,$tmp2\n\t" 8882 "pextrb $tmp3,$tmp,0x0\n\t" 8883 "movsbl $tmp3,$tmp3\n\t" 8884 "cmpl $tmp2,$tmp3\n\t" 8885 "cmovl $tmp3,$tmp2\n\t" 8886 "cmpl $src1,$tmp3\n\t" 8887 "cmovl $tmp3,$src1, 0x0\n\t" 8888 "movl $dst,$tmp2\n\t" 8889 "pextrb $tmp2,$tmp\n\t" 8890 "movsbl $tmp2,$tmp2\n\t" 8891 "pextrb $tmp3,$tmp\n\t" 8892 "movsbl $tmp3,$tmp3\n\t" 8893 "cmpl $tmp2,$tmp3\n\t" 8894 "cmovl $tmp3,$tmp2\n\t" 8895 "cmpl $tmp3,$dst\n\t" 8896 "cmovl $dst,$tmp3\t! max reduction4S" %} 8897 ins_encode %{ 8898 int vector_len = 0; 8899 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 8900 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 0); 8901 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 8902 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 8903 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8904 __ movsbl($tmp2$$Register, $tmp2$$Register); 8905 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8906 __ movsbl($tmp3$$Register, $tmp3$$Register); 8907 __ cmpl($tmp2$$Register, $tmp3$$Register); 8908 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8909 __ cmpl($src1$$Register, $tmp3$$Register); 8910 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8911 __ movl($dst$$Register, $tmp3$$Register); 8912 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8913 __ movsbl($tmp2$$Register, $tmp2$$Register); 8914 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8915 __ movsbl($tmp3$$Register, $tmp3$$Register); 8916 __ cmpl($tmp2$$Register, $tmp3$$Register); 8917 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8918 __ cmpl($tmp3$$Register, $dst$$Register); 8919 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8920 __ movsbl($dst$$Register, $dst$$Register); 8921 %} 8922 ins_pipe( pipe_slow ); 8923 %} 8924 8925 instruct rvmax32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8926 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8927 match(Set dst (MaxReductionV src1 src2)); 8928 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8929 format %{ "vextracti128_high $tmp,$src2\n\t" 8930 "vpmaxsb $tmp,$tmp,$src2\n\t" 8931 "pshufd $tmp4,$tmp,0xE\n\t" 8932 "vpmaxsb $tmp4,$tmp4,$tmp\n\t" 8933 "pshufd $tmp,$tmp4,0x1\n\t" 8934 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8935 "pextrb $tmp2,$tmp, 0x1\n\t" 8936 "movsbl $tmp2,$tmp2\n\t" 8937 "pextrb $tmp3,$tmp,0x0\n\t" 8938 "movsbl $tmp3,$tmp3\n\t" 8939 "cmpl $tmp2,$tmp3\n\t" 8940 "cmovl $tmp3,$tmp2\n\t" 8941 "cmpl $src1,$tmp3\n\t" 8942 "cmovl $tmp3,$src1, 0x0\n\t" 8943 "movl $dst,$tmp2\n\t" 8944 "pextrb $tmp2,$tmp\n\t" 8945 "movsbl $tmp2,$tmp2\n\t" 8946 "pextrb $tmp3,$tmp\n\t" 8947 "movsbl $tmp3,$tmp3\n\t" 8948 "cmpl $tmp2,$tmp3\n\t" 8949 "cmovl $tmp3,$tmp2\n\t" 8950 "cmpl $tmp3,$dst\n\t" 8951 "cmovl $dst,$tmp3\t! min reduction4S" %} 8952 ins_encode %{ 8953 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8954 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 8955 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 8956 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 8957 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 8958 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 8959 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8960 __ movsbl($tmp2$$Register, $tmp2$$Register); 8961 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8962 __ movsbl($tmp3$$Register, $tmp3$$Register); 8963 __ cmpl($tmp2$$Register, $tmp3$$Register); 8964 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8965 __ cmpl($src1$$Register, $tmp3$$Register); 8966 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8967 __ movl($dst$$Register, $tmp3$$Register); 8968 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8969 __ movsbl($tmp2$$Register, $tmp2$$Register); 8970 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8971 __ movsbl($tmp3$$Register, $tmp3$$Register); 8972 __ cmpl($tmp2$$Register, $tmp3$$Register); 8973 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8974 __ cmpl($tmp3$$Register, $dst$$Register); 8975 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8976 __ movsbl($dst$$Register, $dst$$Register); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 instruct rvmax64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8982 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8983 match(Set dst (MaxReductionV src1 src2)); 8984 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8985 format %{ "vextracti64x4_high $tmp4,$src2\n\t" 8986 "vpmaxsb $tmp4,$tmp4,$src2\n\t" 8987 "vextracti128_high $tmp,$tmp4\n\t" 8988 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8989 "pshufd $tmp4,$tmp,0xE\n\t" 8990 "vpmaxsb $tmp,$tmp4,$tmp\n\t" 8991 "pshufd $tmp4,$src2,0xE\n\t" 8992 "vpmaxsb $tmp,$tmp4,$src2\n\t" 8993 "pshufd $tmp,$tmp4,0x1\n\t" 8994 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8995 "pextrb $tmp2,$tmp, 0x1\n\t" 8996 "movsbl $tmp2,$tmp2\n\t" 8997 "pextrb $tmp3,$tmp,0x0\n\t" 8998 "movsbl $tmp3,$tmp3\n\t" 8999 "cmpl $tmp2,$tmp3\n\t" 9000 "cmovl $tmp3,$tmp2\n\t" 9001 "cmpl $src1,$tmp3\n\t" 9002 "cmovl $tmp3,$src1, 0x0\n\t" 9003 "movl $dst,$tmp2\n\t" 9004 "pextrb $tmp2,$tmp\n\t" 9005 "movsbl $tmp2,$tmp2\n\t" 9006 "pextrb $tmp3,$tmp\n\t" 9007 "movsbl $tmp3,$tmp3\n\t" 9008 "cmpl $tmp2,$tmp3\n\t" 9009 "cmovl $tmp3,$tmp2\n\t" 9010 "cmpl $tmp3,$dst\n\t" 9011 "cmovl $dst,$tmp3\t! max reduction32B" %} 9012 ins_encode %{ 9013 __ vextracti64x4_high($tmp4$$XMMRegister, $src2$$XMMRegister); 9014 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 1); 9015 __ vextracti128_high($tmp$$XMMRegister, $tmp4$$XMMRegister); 9016 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 9017 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 9018 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 9019 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 9020 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 9021 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 9022 __ movsbl($tmp2$$Register, $tmp2$$Register); 9023 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 9024 __ movsbl($tmp3$$Register, $tmp3$$Register); 9025 __ cmpl($tmp2$$Register, $tmp3$$Register); 9026 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9027 __ cmpl($src1$$Register, $tmp3$$Register); 9028 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9029 __ movl($dst$$Register, $tmp3$$Register); 9030 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 9031 __ movsbl($tmp2$$Register, $tmp2$$Register); 9032 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 9033 __ movsbl($tmp3$$Register, $tmp3$$Register); 9034 __ cmpl($tmp2$$Register, $tmp3$$Register); 9035 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9036 __ cmpl($tmp3$$Register, $dst$$Register); 9037 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 9038 __ movsbl($dst$$Register, $dst$$Register); 9039 %} 9040 ins_pipe( pipe_slow ); 9041 %} 9042 9043 instruct rsmax4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 9044 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9045 match(Set dst (MaxReductionV src1 src2)); 9046 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9047 format %{ "pshufd $tmp,$src2,0x1\n\t" 9048 "pminsw $tmp,$src2\n\t" 9049 "movzwl $dst,$src1\n\t" 9050 "pextrw $tmp2,$tmp, 0x0\n\t" 9051 "pminsw $dst,$tmp2\n\t" 9052 "pminsw $dst,$tmp2\n\t" 9053 "movswl $dst,$dst\t! min reduction4S" %} 9054 ins_encode %{ 9055 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 9056 __ pmaxsw($tmp$$XMMRegister, $src2$$XMMRegister); 9057 __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); 9058 __ movswl($tmp2$$Register, $tmp2$$Register); 9059 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9060 __ movswl($tmp3$$Register, $tmp3$$Register); 9061 __ cmpl($tmp2$$Register, $tmp3$$Register); 9062 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9063 __ cmpl($src1$$Register, $tmp3$$Register); 9064 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9065 __ movl($dst$$Register, $tmp3$$Register); 9066 %} 9067 ins_pipe( pipe_slow ); 9068 %} 9069 9070 instruct rvmax4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 9071 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9072 match(Set dst (MaxReductionV src1 src2)); 9073 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9074 format %{ "pshufd $tmp,$src2,0x1\n\t" 9075 "pminsw $tmp,$src2\n\t" 9076 "movzwl $dst,$src1\n\t" 9077 "pextrw $tmp2,$tmp, 0x0\n\t" 9078 "pminsw $dst,$tmp2\n\t" 9079 "pminsw $dst,$tmp2\n\t" 9080 "movswl $dst,$dst\t! min reduction4S" %} 9081 ins_encode %{ 9082 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 9083 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 9084 __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); 9085 __ movswl($tmp2$$Register, $tmp2$$Register); 9086 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9087 __ movswl($tmp3$$Register, $tmp3$$Register); 9088 __ cmpl($tmp2$$Register, $tmp3$$Register); 9089 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9090 __ cmpl($src1$$Register, $tmp3$$Register); 9091 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9092 __ movl($dst$$Register, $tmp3$$Register); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct rsmax8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9098 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9099 match(Set dst (MaxReductionV src1 src2)); 9100 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9101 format %{ "pshufd $tmp2,$src2,0xE\n\t" 9102 "pmaxsw $tmp2,$src2\n\t" 9103 "pshufd $tmp,$tmp2,0x1\n\t" 9104 "pmaxsw $tmp,$tmp2\n\t" 9105 "pextrw $tmp2,$tmp\n\t" 9106 "movswl $tmp2,$tmp2\n\t" 9107 "pextrw $tmp3,$tmp, 0x0\n\t" 9108 "movswl $tmp3,$tmp3\n\t" 9109 "cmpl $tmp2,$tmp3\n\t" 9110 "cmovl $tmp3,$tmp2\n\t" 9111 "cmpl $src1,$tmp3\n\t" 9112 "cmovl $tmp3,$src1\n\t" 9113 "movl $dst,$tmp3\t! max reduction8S" %} 9114 ins_encode %{ 9115 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister,0xE); 9116 __ pmaxsw($tmp2$$XMMRegister, $src2$$XMMRegister); 9117 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9118 __ pmaxsw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9119 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9120 __ movswl($tmp4$$Register, $tmp4$$Register); 9121 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9122 __ movswl($tmp3$$Register, $tmp3$$Register); 9123 __ cmpl($tmp4$$Register, $tmp3$$Register); 9124 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9125 __ cmpl($src1$$Register, $tmp3$$Register); 9126 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9127 __ movl($dst$$Register, $tmp3$$Register); 9128 %} 9129 ins_pipe( pipe_slow ); 9130 %} 9131 9132 instruct rvmax8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9133 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9134 match(Set dst (MaxReductionV src1 src2)); 9135 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9136 format %{ "pshufd $tmp,$src2,0xE\n\t" 9137 "vpmaxsw $tmp,$tmp,$src2\n\t" 9138 "pshufd $tmp2,$tmp,0x1\n\t" 9139 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9140 "movzwl $dst,$src1\n\t" 9141 "pextrw $tmp3,$tmp, 0x0\n\t" 9142 "vpmaxsw $dst,$dst,$tmp3\n\t" 9143 "pextrw $tmp3,$tmp, 0x1\n\t" 9144 "vpmaxsw $dst,$dst,$tmp3\n\t" 9145 "movswl $dst,$dst\t! max reduction8S" %} 9146 ins_encode %{ 9147 int vector_len = 0; 9148 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9149 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9150 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 9151 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9152 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9153 __ movswl($tmp4$$Register, $tmp4$$Register); 9154 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9155 __ movswl($tmp3$$Register, $tmp3$$Register); 9156 __ cmpl($tmp4$$Register, $tmp3$$Register); 9157 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9158 __ cmpl($src1$$Register, $tmp3$$Register); 9159 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9160 __ movl($dst$$Register, $tmp3$$Register); 9161 %} 9162 ins_pipe( pipe_slow ); 9163 %} 9164 9165 instruct rvmax16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9166 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9167 match(Set dst (MaxReductionV src1 src2)); 9168 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9169 format %{ "vextracti128_high $tmp,$src2\n\t" 9170 "vpmaxsw $tmp,$tmp,$src2\n\t" 9171 "pshufd $tmp2,$tmp,0xE\n\t" 9172 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9173 "pshufd $tmp2,$tmp,0x1\n\t" 9174 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9175 "pextrw $tmp2,$tmp, 0x1\n\t" 9176 "movswl $tmp2,$tmp2\n\t" 9177 "pextrw $tmp3,$tmp, 0x0\n\t" 9178 "movswl $tmp3,$tmp3\n\t" 9179 "cmpl $tmp2$tmp3\n\t" 9180 "cmovl $tmp3,$tmp2\n\t" 9181 "cmpl $src1,$tmp3\n\t" 9182 "cmovl $tmp3,$src1\n\t" 9183 "movl $dst,$tmp3\t! max reduction16S" %} 9184 ins_encode %{ 9185 int vector_len = 1; 9186 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9187 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9188 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9189 __ vpmaxsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9190 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 9191 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9192 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9193 __ movswl($tmp4$$Register, $tmp4$$Register); 9194 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9195 __ movswl($tmp3$$Register, $tmp3$$Register); 9196 __ cmpl($tmp4$$Register, $tmp3$$Register); 9197 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9198 __ cmpl($src1$$Register, $tmp3$$Register); 9199 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9200 __ movl($dst$$Register, $tmp3$$Register); 9201 %} 9202 ins_pipe( pipe_slow ); 9203 %} 9204 9205 instruct rvmax32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9206 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9207 match(Set dst (MaxReductionV src1 src2)); 9208 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9209 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 9210 "vpmaxsw $tmp2,$tmp2,$src2\n\t" 9211 "vextracti128_high $tmp,$tmp2\n\t" 9212 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9213 "pshufd $tmp2,$tmp,0xE\n\t" 9214 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9215 "pshufd $tmp2,$tmp,0x1\n\t" 9216 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9217 "pextrw $tmp3,$tmp, 0x0\n\t" 9218 "movswl $dst,$src1\n\t" 9219 "pextrw $tmp3,$tmp, 0x0\n\t" 9220 "movswl $dst,$src1\n\t" 9221 "cmpl $tmp2$tmp3\n\t" 9222 "cmovl $tmp3,$tmp2\n\t" 9223 "cmpl $src1,$tmp3\n\t" 9224 "cmovl $tmp3,$src1\n\t" 9225 "movl $dst,$dst\t! max reduction32S" %} 9226 ins_encode %{ 9227 int vector_len = 2; 9228 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9229 __ vpmaxsw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9230 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 9231 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9232 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9233 __ vpmaxsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9234 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 9235 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9236 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9237 __ movswl($tmp4$$Register, $tmp4$$Register); 9238 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9239 __ movswl($tmp3$$Register, $tmp3$$Register); 9240 __ cmpl($tmp4$$Register, $tmp3$$Register); 9241 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9242 __ cmpl($src1$$Register, $tmp3$$Register); 9243 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9244 __ movl($dst$$Register, $tmp3$$Register); 9245 %} 9246 ins_pipe( pipe_slow ); 9247 %} 9248 9249 instruct rsmax2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 9250 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9251 match(Set dst (MaxReductionV src1 src2)); 9252 effect(TEMP tmp, TEMP tmp2); 9253 format %{ "pshufd $tmp,$src2,0x1\n\t" 9254 "pmaxsd $tmp,$src2\n\t" 9255 "movd $tmp2,$src1\n\t" 9256 "pmaxsd $tmp2,$tmp\n\t" 9257 "movd $dst,$tmp2\t! max reduction2I" %} 9258 ins_encode %{ 9259 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9260 __ pmaxsd($tmp$$XMMRegister, $src2$$XMMRegister); 9261 __ movdl($tmp2$$XMMRegister, $src1$$Register); 9262 __ pmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister); 9263 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9264 %} 9265 ins_pipe( pipe_slow ); 9266 %} 9267 9268 instruct rvmax2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 9269 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9270 match(Set dst (MaxReductionV src1 src2)); 9271 effect(TEMP tmp, TEMP tmp2); 9272 format %{ "pshufd $tmp,$src2,0x1\n\t" 9273 "vpmaxsd $tmp2,$tmp,$src2\n\t" 9274 "movd $tmp,$src1\n\t" 9275 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9276 "movd $dst,$tmp2\t! max reduction2I" %} 9277 ins_encode %{ 9278 int vector_len = 0; 9279 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9280 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9281 __ movdl($tmp$$XMMRegister, $src1$$Register); 9282 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9283 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9284 %} 9285 ins_pipe( pipe_slow ); 9286 %} 9287 9288 instruct rsmax4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 9289 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9290 match(Set dst (MaxReductionV src1 src2)); 9291 effect(TEMP tmp, TEMP tmp2); 9292 format %{ "pshufd $tmp,$src2,0xE\n\t" 9293 "pmaxsd $tmp,$src2\n\t" 9294 "pshufd $tmp2,$tmp,0x1\n\t" 9295 "pmaxsd $tmp2,$tmp\n\t" 9296 "movd $tmp,$src1\n\t" 9297 "pmaxsd $tmp2,$tmp\n\t" 9298 "movd $dst,$tmp2\t! max reduction4I" %} 9299 ins_encode %{ 9300 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9301 __ pmaxsd($tmp$$XMMRegister, $src2$$XMMRegister); 9302 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 9303 __ pmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 9304 __ movdl($tmp$$XMMRegister, $src1$$Register); 9305 __ pmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 9306 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct rvmax4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 9312 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9313 match(Set dst (MaxReductionV src1 src2)); 9314 effect(TEMP tmp, TEMP tmp2); 9315 format %{ "pshufd $tmp,$src2,0xE\n\t" 9316 "vpmaxsd $tmp2,$tmp,$src2\n\t" 9317 "pshufd $tmp,$tmp2,0x1\n\t" 9318 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9319 "movd $tmp,$src1\n\t" 9320 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9321 "movd $dst,$tmp2\t! max reduction4I" %} 9322 ins_encode %{ 9323 int vector_len = 0; 9324 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9325 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9326 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9327 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9328 __ movdl($tmp$$XMMRegister, $src1$$Register); 9329 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9330 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9331 %} 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 instruct rvmax4I_reduction_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 9336 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9337 match(Set dst (MaxReductionV src1 src2)); 9338 effect(TEMP tmp, TEMP tmp2); 9339 format %{ "pshufd $tmp,$src2,0xE\n\t" 9340 "vpmaxsd $tmp2,$tmp,$src2\n\t" 9341 "pshufd $tmp,$tmp2,0x1\n\t" 9342 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9343 "movd $tmp,$src1\n\t" 9344 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9345 "movd $dst,$tmp2\t! max reduction4I" %} 9346 ins_encode %{ 9347 int vector_len = 0; 9348 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9349 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9350 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9351 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9352 __ movdl($tmp$$XMMRegister, $src1$$Register); 9353 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9354 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9355 %} 9356 ins_pipe( pipe_slow ); 9357 %} 9358 9359 instruct rvmax8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 9360 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9361 match(Set dst (MaxReductionV src1 src2)); 9362 effect(TEMP tmp, TEMP tmp2); 9363 format %{ "vextracti128_high $tmp,$src2\n\t" 9364 "vpmaxsd $tmp,$tmp,$src2\n\t" 9365 "pshufd $tmp2,$tmp,0xE\n\t" 9366 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9367 "pshufd $tmp,$tmp2,0x1\n\t" 9368 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9369 "movd $tmp,$src1\n\t" 9370 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9371 "movd $dst,$tmp2\t! max reduction8I" %} 9372 ins_encode %{ 9373 int vector_len = 1; 9374 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9375 __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9376 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9377 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9378 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9379 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9380 __ movdl($tmp$$XMMRegister, $src1$$Register); 9381 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9382 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9383 %} 9384 ins_pipe( pipe_slow ); 9385 %} 9386 9387 instruct rvmax8I_reduction_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 9388 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9389 match(Set dst (MaxReductionV src1 src2)); 9390 effect(TEMP tmp, TEMP tmp2); 9391 format %{ "vextracti128_high $tmp,$src2\n\t" 9392 "vpmaxsd $tmp,$tmp,$src2\n\t" 9393 "pshufd $tmp2,$tmp,0xE\n\t" 9394 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9395 "pshufd $tmp,$tmp2,0x1\n\t" 9396 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9397 "movd $tmp,$src1\n\t" 9398 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9399 "movd $dst,$tmp2\t! max reduction8I" %} 9400 ins_encode %{ 9401 int vector_len = 1; 9402 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9403 __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9404 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9405 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9406 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9407 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9408 __ movdl($tmp$$XMMRegister, $src1$$Register); 9409 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9410 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 instruct rvmax16I_reduction_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 9416 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9417 match(Set dst (MaxReductionV src1 src2)); 9418 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9419 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 9420 "vpmaxsd $tmp3,$tmp3,$src2\n\t" 9421 "vextracti128_high $tmp,$tmp3\n\t" 9422 "vpmaxsd $tmp,$tmp,$tmp3\n\t" 9423 "pshufd $tmp2,$tmp,0xE\n\t" 9424 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9425 "pshufd $tmp,$tmp2,0x1\n\t" 9426 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9427 "movd $tmp,$src1\n\t" 9428 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9429 "movd $dst,$tmp2\t! max reduction16I" %} 9430 ins_encode %{ 9431 int vector_len = 2; 9432 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 9433 __ vpmaxsd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 9434 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 9435 __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9436 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9437 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9438 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9439 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9440 __ movdl($tmp$$XMMRegister, $src1$$Register); 9441 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9442 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9443 %} 9444 ins_pipe( pipe_slow ); 9445 %} 9446 9447 // Long Max Reduction 9448 instruct rsmax1L_reduction_reg(rRegL dst, rRegL src1, vecD src2, rxmm0 xmm_0, regF tmp2, regF tmp3) %{ 9449 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9450 match(Set dst (MaxReductionV src1 src2)); 9451 effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); 9452 format %{ "movdq $xmm_0,$src1\n\t" 9453 "movdq $tmp2,$src1\n\t" 9454 "pcmpgtq $xmm_0,$src2\n\t" 9455 "blendvpd $tmp2,$src2\n\t" 9456 "movdq $dst,$tmp2\t! max reduction1L" %} 9457 ins_encode %{ 9458 __ movdq($xmm_0$$XMMRegister,$src1$$Register); 9459 __ movdq($tmp2$$XMMRegister,$src1$$Register); 9460 __ movdq($tmp3$$XMMRegister,$src2$$Register); 9461 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 9462 __ blendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister); 9463 __ movdq($dst$$Register, $tmp3$$XMMRegister); 9464 %} 9465 ins_pipe( pipe_slow ); 9466 %} 9467 9468 instruct rsmax2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, rxmm0 xmm_0, regF tmp2, regF tmp3) %{ 9469 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9470 match(Set dst (MaxReductionV src1 src2)); 9471 effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); 9472 format %{ "pshufd $tmp3,$src2,0xE\n\t" 9473 "movdqu $xmm_0,$src2\n\t" 9474 "pcmpgtq $xmm_0,$tmp3\n\t" 9475 "blendvpd $tmp3,$src2\n\t" 9476 "movdqu $xmm_0,$tmp3\n\t" 9477 "movdq $tmp2,$src1\n\t" 9478 "pcmpgtq $xmm_0,$tmp2\n\t" 9479 "blendvpd $tmp2,$tmp3\n\t" 9480 "movq $dst,$tmp2\t! max reduction2L" %} 9481 ins_encode %{ 9482 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 0xE); 9483 __ movdqu($xmm_0$$XMMRegister, $src2$$XMMRegister); 9484 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 9485 __ blendvpd($tmp3$$XMMRegister, $src2$$XMMRegister); 9486 __ movdqu($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 9487 __ movdq($tmp2$$XMMRegister, $src1$$Register); 9488 __ pcmpgtq($xmm_0$$XMMRegister, $tmp2$$XMMRegister); 9489 __ blendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister); 9490 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9491 %} 9492 ins_pipe( pipe_slow ); 9493 %} 9494 9495 instruct rvmax2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2, regF tmp3) %{ 9496 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9497 match(Set dst (MaxReductionV src1 src2)); 9498 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9499 format %{ "pshufd $tmp2,$src2,0xE\n\t" 9500 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 9501 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 9502 "movq $tmp,$src1\n\t" 9503 "vpcmpgtq $tmp3,$tmp2,$tmp\n\t" 9504 "blendvpd $tmp2,$tmp2,$src1,$tmp3\n\t" 9505 "movq $dst,$tmp2\t! max reduction2L" %} 9506 ins_encode %{ 9507 int vector_len = 0; 9508 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 9509 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9510 __ vblendvpd($tmp2$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9511 __ movdq($tmp$$XMMRegister,$src1$$Register); 9512 __ vpcmpgtq($tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9513 __ vblendvpd($tmp2$$XMMRegister, $tmp$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, vector_len); 9514 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9515 %} 9516 ins_pipe( pipe_slow ); 9517 %} 9518 9519 instruct rvmax4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 9520 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9521 match(Set dst (MaxReductionV src1 src2)); 9522 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9523 format %{ "vextracti128_high $tmp2,$src2\n\t" 9524 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 9525 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 9526 "vpshufd $tmp3, $tmp2,0x1\n\t" 9527 "vpcmpgtq $tmp, $tmp3,$tmp\n\t2" 9528 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 9529 "movq $tmp2,$src1\n\t" 9530 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 9531 "blendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 9532 "movq $dst,$tmp2\t! max reduction2L" %} 9533 ins_encode %{ 9534 int vector_len = 1; 9535 __ vextracti128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9536 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9537 __ vblendvpd($tmp2$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9538 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9539 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9540 __ vblendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9541 __ movdq($tmp$$XMMRegister,$src1$$Register); 9542 __ vpcmpgtq($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9543 __ vblendvpd($tmp2$$XMMRegister, $tmp$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, vector_len); 9544 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct rvmax8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 9550 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9551 match(Set dst (MaxReductionV src1 src2)); 9552 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9553 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 9554 "vpcmpgtq $tmp,$tmp3,$src2\n\t" 9555 "vblendvpd $tmp3,$tmp3,$src2,$tmp\n\t" 9556 "vextracti128_high $tmp2,$tmp3\n\t" 9557 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 9558 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 9559 "vpshufd $tmp3,$tmp2,0x1\n\t" 9560 "vpcmpgtq $tmp,$tmp3,$tmp2\n\t" 9561 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 9562 "movq $tmp2,$src1\n\t" 9563 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 9564 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 9565 "movq $dst,$tmp2\t! max reduction4I" %} 9566 ins_encode %{ 9567 int vector_len = 1; 9568 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 9569 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 9570 __ vblendvpd($tmp3$$XMMRegister, $src2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9571 __ vextracti128_high($tmp2$$XMMRegister, $tmp3$$XMMRegister); 9572 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9573 __ vblendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9574 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9575 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9576 __ vblendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9577 __ movdq($tmp2$$XMMRegister, $src1$$Register); 9578 __ vpcmpgtq($tmp$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9579 __ vblendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9580 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 // Float max Reduction 9586 instruct rsmax2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 9587 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9588 match(Set dst (MaxReductionV dst src2)); 9589 effect(TEMP dst, TEMP tmp); 9590 format %{ "maxps $dst, $src2\n\t" 9591 "pshufd $tmp,$src2,0x1\n\t" 9592 "maxps $dst,$tmp\t! max reduction2F" %} 9593 ins_encode %{ 9594 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 9595 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9596 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9597 %} 9598 ins_pipe( pipe_slow ); 9599 %} 9600 9601 instruct rvmax2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 9602 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9603 match(Set dst (MaxReductionV dst src2)); 9604 effect(TEMP dst, TEMP tmp); 9605 format %{ "vmaxps $dst,$dst,$src2\n\t" 9606 "pshufd $tmp,$src2,0x1\n\t" 9607 "vmaxps $dst,$dst,$tmp\t! max reduction2F" %} 9608 ins_encode %{ 9609 int vector_len = 0; 9610 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9611 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9612 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9613 %} 9614 ins_pipe( pipe_slow ); 9615 %} 9616 9617 instruct rsmax4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 9618 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9619 match(Set dst (MaxReductionV dst src2)); 9620 effect(TEMP tmp, TEMP dst); 9621 format %{ "maxps $dst,$src2\n\t" 9622 "pshufd $tmp,$src2,0x1\n\t" 9623 "maxps $dst,tmp\n\t" 9624 "pshufd $tmp,$src2,0x2\n\t" 9625 "maxps $dst,tmp\n\t" 9626 "pshufd $tmp,$src2,0x3\n\t" 9627 "maxps $dst,$tmp\t! max reduction4F" %} 9628 ins_encode %{ 9629 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 9630 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9631 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9632 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 9633 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9634 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 9635 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9636 %} 9637 ins_pipe( pipe_slow ); 9638 %} 9639 9640 instruct rvmax4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 9641 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9642 match(Set dst (MaxReductionV dst src2)); 9643 effect(TEMP tmp, TEMP dst); 9644 format %{ "vmaxps $dst,$dst,$src2\n\t" 9645 "pshufd $tmp,$src2,0x1\n\t" 9646 "vmaxps $dst,$dst,tmp\n\t" 9647 "pshufd $tmp,$src2,0x2\n\t" 9648 "vmaxps $dst,$dst,tmp\n\t" 9649 "pshufd $tmp,$src2,0x3\n\t" 9650 "vmaxps $dst,$dst,$tmp\t! max reduction4F" %} 9651 ins_encode %{ 9652 int vector_len = 0; 9653 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9654 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9655 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9656 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 9657 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9658 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 9659 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9660 %} 9661 ins_pipe( pipe_slow ); 9662 %} 9663 9664 instruct rvmax8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 9665 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9666 match(Set dst (MaxReductionV dst src2)); 9667 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9668 format %{ "vmaxps $dst,$dst,$src2\n\t" 9669 "pshufd $tmp,$src2,0x01\n\t" 9670 "vmaxps $dst,$dst,$tmp\n\t" 9671 "pshufd $tmp,$src2,0x02\n\t" 9672 "vmaxps $dst,$dst,$tmp\n\t" 9673 "pshufd $tmp,$src2,0x03\n\t" 9674 "vmaxps $dst,$dst,$tmp\n\t" 9675 "vextractf128_high $tmp2,$src2\n\t" 9676 "vmaxps $dst,$dst,$tmp2\n\t" 9677 "pshufd $tmp,$tmp2,0x01\n\t" 9678 "vmaxps $dst,$dst,$tmp\n\t" 9679 "pshufd $tmp,$tmp2,0x02\n\t" 9680 "vmaxps $dst,$dst,$tmp\n\t" 9681 "pshufd $tmp,$tmp2,0x03\n\t" 9682 "vmaxps $dst,$dst,$tmp\t! sub reduction8F" %} 9683 ins_encode %{ 9684 int vector_len = 1; 9685 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9686 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 9687 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9688 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 9689 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9690 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 9691 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9692 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9693 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9694 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9695 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9696 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9697 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9698 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9699 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9700 %} 9701 ins_pipe( pipe_slow ); 9702 %} 9703 9704 instruct rvmax16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 9705 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9706 match(Set dst (MaxReductionV dst src2)); 9707 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9708 format %{ "vmaxps $dst,$dst,$src2\n\t" 9709 "pshufd $tmp,$src2,0x01\n\t" 9710 "vmaxps $dst,$dst,$tmp\n\t" 9711 "pshufd $tmp,$src2,0x02\n\t" 9712 "vmaxps $dst,$dst,$tmp\n\t" 9713 "pshufd $tmp,$src2,0x03\n\t" 9714 "vmaxps $dst,$dst,$tmp\n\t" 9715 "vextractf32x4 $tmp2,$src2,0x1\n\t" 9716 "vmaxps $dst,$dst,$tmp2\n\t" 9717 "pshufd $tmp,$tmp2,0x01\n\t" 9718 "vmaxps $dst,$dst,$tmp\n\t" 9719 "pshufd $tmp,$tmp2,0x02\n\t" 9720 "vmaxps $dst,$dst,$tmp\n\t" 9721 "pshufd $tmp,$tmp2,0x03\n\t" 9722 "vmaxps $dst,$dst,$tmp\n\t" 9723 "vextractf32x4 $tmp2,$src2,0x2\n\t" 9724 "vmaxps $dst,$dst,$tmp2\n\t" 9725 "pshufd $tmp,$tmp2,0x01\n\t" 9726 "vmaxps $dst,$dst,$tmp\n\t" 9727 "pshufd $tmp,$tmp2,0x02\n\t" 9728 "vmaxps $dst,$dst,$tmp\n\t" 9729 "pshufd $tmp,$tmp2,0x03\n\t" 9730 "vmaxps $dst,$dst,$tmp\n\t" 9731 "vextractf32x4 $tmp2,$src2,0x3\n\t" 9732 "vmaxps $dst,$dst,$tmp2\n\t" 9733 "pshufd $tmp,$tmp2,0x01\n\t" 9734 "vmaxps $dst,$dst,$tmp\n\t" 9735 "pshufd $tmp,$tmp2,0x02\n\t" 9736 "vmaxps $dst,$dst,$tmp\n\t" 9737 "pshufd $tmp,$tmp2,0x03\n\t" 9738 "vmaxps $dst,$dst,$tmp\t! sub reduction16F" %} 9739 ins_encode %{ 9740 int vector_len = 2; 9741 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9742 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 9743 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9744 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 9745 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9746 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 9747 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9748 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 9749 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9750 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9751 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9752 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9753 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9754 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9755 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9756 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 9757 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9758 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9759 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9760 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9761 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9762 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9763 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9764 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 9765 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9766 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9767 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9768 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9769 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9770 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9771 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9772 %} 9773 ins_pipe( pipe_slow ); 9774 %} 9775 9776 instruct rsmax2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 9777 predicate(UseSSE >= 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9778 match(Set dst (MaxReductionV dst src2)); 9779 effect(TEMP tmp, TEMP dst); 9780 format %{ "maxpd $dst,$src2\n\t" 9781 "pshufd $tmp,$src2,0xE\n\t" 9782 "maxpd $dst,$tmp\t! max reduction2D" %} 9783 ins_encode %{ 9784 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 9785 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9786 __ maxpd($dst$$XMMRegister, $tmp$$XMMRegister); 9787 %} 9788 ins_pipe( pipe_slow ); 9789 %} 9790 9791 instruct rvmax2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 9792 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9793 match(Set dst (MaxReductionV dst src2)); 9794 effect(TEMP tmp, TEMP dst); 9795 format %{ "vmaxpd $dst,$dst,$src2\n\t" 9796 "pshufd $tmp,$src2,0xE\n\t" 9797 "vmaxpd $dst,$dst,$tmp\t! max reduction2D" %} 9798 ins_encode %{ 9799 int vector_len = 0; 9800 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9801 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9802 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9803 %} 9804 ins_pipe( pipe_slow ); 9805 %} 9806 9807 instruct rvmax4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 9808 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9809 match(Set dst (MaxReductionV dst src2)); 9810 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9811 format %{ "vmaxpd $dst,$dst,$src2\n\t" 9812 "pshufd $tmp,$src2,0xE\n\t" 9813 "vmaxpd $dst,$dst,$tmp\n\t" 9814 "vextractf32x4 $tmp2,$src2,0x1\n\t" 9815 "vmaxpd $dst,$dst,$tmp2\n\t" 9816 "pshufd $tmp,$tmp2,0xE\n\t" 9817 "vmaxpd $dst,$dst,$tmp\t! max reduction4D" %} 9818 ins_encode %{ 9819 int vector_len = 1; 9820 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9821 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9822 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9823 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 9824 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9825 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9826 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9827 %} 9828 ins_pipe( pipe_slow ); 9829 %} 9830 9831 instruct rvmax8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 9832 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9833 match(Set dst (MaxReductionV dst src2)); 9834 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9835 format %{ "vmaxpd $dst,$dst,$src2\n\t" 9836 "pshufd $tmp,$src2,0xE\n\t" 9837 "vmaxpd $dst,$dst,$tmp\n\t" 9838 "vextractf32x4 $tmp2,$src2,0x1\n\t" 9839 "vmaxpd $dst,$dst,$tmp2\n\t" 9840 "pshufd $tmp,$tmp2,0xE\n\t" 9841 "vmaxpd $dst,$dst,$tmp\n\t" 9842 "vextractf32x4 $tmp2,$src2,0x2\n\t" 9843 "vmaxpd $dst,$dst,$tmp2\n\t" 9844 "pshufd $tmp,$tmp2,0xE\n\t" 9845 "vmaxpd $dst,$dst,$tmp\n\t" 9846 "vextractf32x4 $tmp2,$src2,0x3\n\t" 9847 "vmaxpd $dst,$dst,$tmp2\n\t" 9848 "pshufd $tmp,$tmp2,0xE\n\t" 9849 "vmaxpd $dst,$dst,$tmp\t! max reduction8D" %} 9850 ins_encode %{ 9851 int vector_len = 2; 9852 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9853 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9854 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9855 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 9856 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9857 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9858 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9859 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 9860 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9861 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9862 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9863 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 9864 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9865 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9866 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9867 %} 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 instruct rsand8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 9872 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9873 match(Set dst (AndReductionV src1 src2)); 9874 effect(TEMP tmp, TEMP tmp2, TEMP dst); 9875 format %{ 9876 "pshufd $tmp,$src2,0x1\n\t" 9877 "pand $tmp,$src2\n\t" 9878 "movzbl $dst,$src1\n\t" 9879 "pextrb $tmp2,$tmp, 0x0\n\t" 9880 "andl $dst,$tmp2\n\t" 9881 "pextrb $tmp2,$tmp, 0x1\n\t" 9882 "andl $dst,$tmp2\n\t" 9883 "pextrb $tmp2,$tmp, 0x2\n\t" 9884 "andl $dst,$tmp2\n\t" 9885 "pextrb $tmp2,$tmp, 0x3\n\t" 9886 "andl $dst,$tmp2\n\t" 9887 "movsbl $dst,$dst\t! and reduction8B" %} 9888 ins_encode %{ 9889 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9890 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 9891 __ movzbl($dst$$Register, $src1$$Register); 9892 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 9893 __ andl($dst$$Register, $tmp2$$Register); 9894 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 9895 __ andl($dst$$Register, $tmp2$$Register); 9896 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 9897 __ andl($dst$$Register, $tmp2$$Register); 9898 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 9899 __ andl($dst$$Register, $tmp2$$Register); 9900 __ movsbl($dst$$Register, $dst$$Register); 9901 %} 9902 ins_pipe( pipe_slow ); 9903 %} 9904 9905 instruct rsand16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 9906 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9907 match(Set dst (AndReductionV src1 src2)); 9908 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 9909 format %{ "pshufd $tmp,$src2,0xE\n\t" 9910 "pand $tmp,$src2\n\t" 9911 "pshufd $tmp2,$tmp,0x1\n\t" 9912 "pand $tmp,$tmp,$tmp2\n\t" 9913 "movzbl $dst,$src1\n\t" 9914 "pextrb $tmp3,$tmp, 0x0\n\t" 9915 "andl $dst,$tmp3\n\t" 9916 "pextrb $tmp3,$tmp, 0x1\n\t" 9917 "andl $dst,$tmp3\n\t" 9918 "pextrb $tmp3,$tmp, 0x2\n\t" 9919 "andl $dst,$tmp3\n\t" 9920 "pextrb $tmp3,$tmp, 0x3\n\t" 9921 "andl $dst,$tmp3\n\t" 9922 "movsbl $dst,$dst\t! and reduction16B" %} 9923 ins_encode %{ 9924 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9925 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 9926 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 9927 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 9928 __ movzbl($dst$$Register, $src1$$Register); 9929 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 9930 __ andl($dst$$Register, $tmp3$$Register); 9931 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 9932 __ andl($dst$$Register, $tmp3$$Register); 9933 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 9934 __ andl($dst$$Register, $tmp3$$Register); 9935 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 9936 __ andl($dst$$Register, $tmp3$$Register); 9937 __ movsbl($dst$$Register, $dst$$Register); 9938 %} 9939 ins_pipe( pipe_slow ); 9940 %} 9941 9942 instruct rvand32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 9943 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9944 match(Set dst (AndReductionV src1 src2)); 9945 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 9946 format %{ "vextracti128_high $tmp,$src2\n\t" 9947 "vpand $tmp,$tmp,$src2\n\t" 9948 "pshufd $tmp2,$tmp,0xE\n\t" 9949 "vpand $tmp,$tmp,$tmp2\n\t" 9950 "pshufd $tmp2,$tmp,0x1\n\t" 9951 "vpand $tmp,$tmp,$tmp2\n\t" 9952 "movzbl $dst,$src1\n\t" 9953 "pextrb $tmp3,$tmp, 0x0\n\t" 9954 "andl $dst,$tmp3\n\t" 9955 "pextrb $tmp3,$tmp, 0x1\n\t" 9956 "andl $dst,$tmp3\n\t" 9957 "pextrb $tmp3,$tmp, 0x2\n\t" 9958 "andl $dst,$tmp3\n\t" 9959 "pextrb $tmp3,$tmp, 0x3\n\t" 9960 "andl $dst,$tmp3\n\t" 9961 "movsbl $dst,$dst\t! and reduction32B" %} 9962 ins_encode %{ 9963 int vector_len = 0; 9964 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9965 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9966 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 9967 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9968 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 9969 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9970 __ movzbl($dst$$Register, $src1$$Register); 9971 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 9972 __ andl($dst$$Register, $tmp3$$Register); 9973 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 9974 __ andl($dst$$Register, $tmp3$$Register); 9975 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 9976 __ andl($dst$$Register, $tmp3$$Register); 9977 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 9978 __ andl($dst$$Register, $tmp3$$Register); 9979 __ movsbl($dst$$Register, $dst$$Register); 9980 %} 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 instruct rvand64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 9985 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9986 match(Set dst (AndReductionV src1 src2)); 9987 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 9988 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 9989 "vpand $tmp2,$tmp2,$src2\n\t" 9990 "vextracti128_high $tmp,$tmp2\n\t" 9991 "vpand $tmp,$tmp,$tmp2\n\t" 9992 "pshufd $tmp2,$tmp,0xE\n\t" 9993 "vpand $tmp,$tmp,$tmp2\n\t" 9994 "pshufd $tmp2,$tmp,0x1\n\t" 9995 "vpand $tmp,$tmp,$tmp2\n\t" 9996 "movzbl $dst,$src1\n\t" 9997 "movdl $tmp3,$tmp\n\t" 9998 "andl $dst,$tmp3\n\t" 9999 "shrl $tmp3,0x8\n\t" 10000 "andl $dst,$tmp3\n\t" 10001 "shrl $tmp3,0x8\n\t" 10002 "andl $dst,$tmp3\n\t" 10003 "shrl $tmp3,0x8\n\t" 10004 "andl $dst,$tmp3\n\t" 10005 "movsbl $dst,$dst\t! and reduction64B" %} 10006 ins_encode %{ 10007 int vector_len = 0; 10008 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10009 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10010 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10011 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10012 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10013 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10014 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10015 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10016 __ movzbl($dst$$Register, $src1$$Register); 10017 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10018 __ andl($dst$$Register, $tmp3$$Register); 10019 __ shrl($tmp3$$Register, 8); 10020 __ andl($dst$$Register, $tmp3$$Register); 10021 __ shrl($tmp3$$Register, 8); 10022 __ andl($dst$$Register, $tmp3$$Register); 10023 __ shrl($tmp3$$Register, 8); 10024 __ andl($dst$$Register, $tmp3$$Register); 10025 __ movsbl($dst$$Register, $dst$$Register); 10026 %} 10027 ins_pipe( pipe_slow ); 10028 %} 10029 10030 instruct rsand4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10031 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10032 match(Set dst (AndReductionV src1 src2)); 10033 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10034 format %{ 10035 "pshufd $tmp,$src2,0x1\n\t" 10036 "pand $tmp,$src2\n\t" 10037 "movzwl $dst,$src1\n\t" 10038 "pextrw $tmp2,$tmp, 0x0\n\t" 10039 "andw $dst,$tmp2\n\t" 10040 "pextrw $tmp2,$tmp, 0x1\n\t" 10041 "andw $dst,$tmp2\n\t" 10042 "movswl $dst,$dst\t! and reduction4S" %} 10043 ins_encode %{ 10044 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10045 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 10046 __ movzwl($dst$$Register, $src1$$Register); 10047 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10048 __ andw($dst$$Register, $tmp2$$Register); 10049 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10050 __ andw($dst$$Register, $tmp2$$Register); 10051 __ movswl($dst$$Register, $dst$$Register); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 instruct rsand8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10057 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10058 match(Set dst (AndReductionV src1 src2)); 10059 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10060 format %{ "pshufd $tmp,$src2,0xE\n\t" 10061 "pand $tmp,$src2\n\t" 10062 "pshufd $tmp2,$tmp,0x1\n\t" 10063 "pand $tmp,$tmp,$tmp2\n\t" 10064 "movzwl $dst,$src1\n\t" 10065 "pextrw $tmp3,$tmp, 0x0\n\t" 10066 "andw $dst,$tmp3\n\t" 10067 "pextrw $tmp3,$tmp, 0x1\n\t" 10068 "andw $dst,$tmp3\n\t" 10069 "movswl $dst,$dst\t! and reduction8S" %} 10070 ins_encode %{ 10071 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10072 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 10073 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10074 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 10075 __ movzwl($dst$$Register, $src1$$Register); 10076 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10077 __ andw($dst$$Register, $tmp3$$Register); 10078 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10079 __ andw($dst$$Register, $tmp3$$Register); 10080 __ movswl($dst$$Register, $dst$$Register); 10081 %} 10082 ins_pipe( pipe_slow ); 10083 %} 10084 10085 instruct rvand16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10086 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10087 match(Set dst (AndReductionV src1 src2)); 10088 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10089 format %{ "vextracti128_high $tmp,$src2\n\t" 10090 "vpand $tmp,$tmp,$src2\n\t" 10091 "pshufd $tmp2,$tmp,0xE\n\t" 10092 "vpand $tmp,$tmp,$tmp2\n\t" 10093 "pshufd $tmp2,$tmp,0x1\n\t" 10094 "vpand $tmp,$tmp,$tmp2\n\t" 10095 "movzwl $dst,$src1\n\t" 10096 "pextrw $tmp3,$tmp, 0x0\n\t" 10097 "andw $dst,$tmp3\n\t" 10098 "pextrw $tmp3,$tmp, 0x1\n\t" 10099 "andw $dst,$tmp3\n\t" 10100 "movswl $dst,$dst\t! and reduction16S" %} 10101 ins_encode %{ 10102 int vector_len = 0; 10103 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10104 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10105 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10106 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10107 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10108 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10109 __ movzwl($dst$$Register, $src1$$Register); 10110 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10111 __ andw($dst$$Register, $tmp3$$Register); 10112 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10113 __ andw($dst$$Register, $tmp3$$Register); 10114 __ movswl($dst$$Register, $dst$$Register); 10115 %} 10116 ins_pipe( pipe_slow ); 10117 %} 10118 10119 instruct rvand32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10120 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10121 match(Set dst (AndReductionV src1 src2)); 10122 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10123 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10124 "vpand $tmp2,$tmp2,$src2\n\t" 10125 "vextracti128_high $tmp,$tmp2\n\t" 10126 "vpand $tmp,$tmp,$tmp2\n\t" 10127 "pshufd $tmp2,$tmp,0xE\n\t" 10128 "vpand $tmp,$tmp,$tmp2\n\t" 10129 "pshufd $tmp2,$tmp,0x1\n\t" 10130 "vpand $tmp,$tmp,$tmp2\n\t" 10131 "movzwl $dst,$src1\n\t" 10132 "movdl $tmp3,$tmp\n\t" 10133 "andw $dst,$tmp3\n\t" 10134 "shrl $tmp3,0x16\n\t" 10135 "andw $dst,$tmp3\n\t" 10136 "movswl $dst,$dst\t! and reduction32S" %} 10137 ins_encode %{ 10138 int vector_len = 0; 10139 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10140 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10141 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10142 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10143 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10144 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10145 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10146 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10147 __ movzwl($dst$$Register, $src1$$Register); 10148 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10149 __ andw($dst$$Register, $tmp3$$Register); 10150 __ shrl($tmp3$$Register, 16); 10151 __ andw($dst$$Register, $tmp3$$Register); 10152 __ movswl($dst$$Register, $dst$$Register); 10153 %} 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 10158 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10159 match(Set dst (AndReductionV src1 src2)); 10160 effect(TEMP tmp, TEMP tmp2); 10161 format %{ "pshufd $tmp2,$src2,0x1\n\t" 10162 "pand $tmp2,$src2\n\t" 10163 "movd $tmp,$src1\n\t" 10164 "pand $tmp2,$tmp\n\t" 10165 "movd $dst,$tmp2\t! and reduction2I" %} 10166 ins_encode %{ 10167 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 10168 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 10169 __ movdl($tmp$$XMMRegister, $src1$$Register); 10170 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10171 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10172 %} 10173 ins_pipe( pipe_slow ); 10174 %} 10175 10176 instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 10177 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10178 match(Set dst (AndReductionV src1 src2)); 10179 effect(TEMP tmp, TEMP tmp2); 10180 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10181 "pand $tmp2,$src2\n\t" 10182 "pshufd $tmp,$tmp2,0x1\n\t" 10183 "pand $tmp2,$tmp\n\t" 10184 "movd $tmp,$src1\n\t" 10185 "pand $tmp2,$tmp\n\t" 10186 "movd $dst,$tmp2\t! and reduction4I" %} 10187 ins_encode %{ 10188 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10189 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 10190 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 10191 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10192 __ movdl($tmp$$XMMRegister, $src1$$Register); 10193 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10194 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10195 %} 10196 ins_pipe( pipe_slow ); 10197 %} 10198 10199 instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 10200 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10201 match(Set dst (AndReductionV src1 src2)); 10202 effect(TEMP tmp, TEMP tmp2); 10203 format %{ "vextracti128_high $tmp,$src2\n\t" 10204 "vpand $tmp,$tmp,$src2\n\t" 10205 "vpshufd $tmp2,$tmp,0xE\n\t" 10206 "vpand $tmp,$tmp,$tmp2\n\t" 10207 "vpshufd $tmp2,$tmp,0x1\n\t" 10208 "vpand $tmp,$tmp,$tmp2\n\t" 10209 "movd $tmp2,$src1\n\t" 10210 "vpand $tmp2,$tmp,$tmp2\n\t" 10211 "movd $dst,$tmp2\t! and reduction8I" %} 10212 ins_encode %{ 10213 int vector_len = 0; 10214 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10215 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10216 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10217 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10218 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10219 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10220 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10221 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10222 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 10228 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10229 match(Set dst (AndReductionV src1 src2)); 10230 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 10231 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 10232 "vpand $tmp3,$tmp3,$src2\n\t" 10233 "vextracti128_high $tmp,$tmp3\n\t" 10234 "vpand $tmp,$tmp,$src2\n\t" 10235 "vpshufd $tmp2,$tmp,0xE\n\t" 10236 "vpand $tmp,$tmp,$tmp2\n\t" 10237 "vpshufd $tmp2,$tmp,0x1\n\t" 10238 "vpand $tmp,$tmp,$tmp2\n\t" 10239 "movd $tmp2,$src1\n\t" 10240 "vpand $tmp2,$tmp,$tmp2\n\t" 10241 "movd $dst,$tmp2\t! and reduction16I" %} 10242 ins_encode %{ 10243 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 10244 __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 10245 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 10246 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 10247 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); 10248 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 10249 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); 10250 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 10251 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10252 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 10253 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10254 %} 10255 ins_pipe( pipe_slow ); 10256 %} 10257 10258 #ifdef _LP64 10259 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 10260 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10261 match(Set dst (AndReductionV src1 src2)); 10262 effect(TEMP tmp, TEMP tmp2); 10263 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10264 "pand $tmp2,$src2\n\t" 10265 "movdq $tmp,$src1\n\t" 10266 "pand $tmp2,$tmp\n\t" 10267 "movq $dst,$tmp2\t! and reduction2L" %} 10268 ins_encode %{ 10269 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10270 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 10271 __ movdq($tmp$$XMMRegister, $src1$$Register); 10272 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10273 __ movq($dst$$Register, $tmp2$$XMMRegister); 10274 %} 10275 ins_pipe( pipe_slow ); 10276 %} 10277 10278 instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 10279 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10280 match(Set dst (AndReductionV src1 src2)); 10281 effect(TEMP tmp, TEMP tmp2); 10282 format %{ "vextracti128_high $tmp,$src2\n\t" 10283 "vpand $tmp2,$tmp,$src2\n\t" 10284 "vpshufd $tmp,$tmp2,0xE\n\t" 10285 "vpand $tmp2,$tmp2,$tmp\n\t" 10286 "movq $tmp,$src1\n\t" 10287 "vpand $tmp2,$tmp2,$tmp\n\t" 10288 "movq $dst,$tmp2\t! and reduction4L" %} 10289 ins_encode %{ 10290 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10291 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 10292 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 10293 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10294 __ movq($tmp$$XMMRegister, $src1$$Register); 10295 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10296 __ movq($dst$$Register, $tmp2$$XMMRegister); 10297 %} 10298 ins_pipe( pipe_slow ); 10299 %} 10300 10301 instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 10302 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10303 match(Set dst (AndReductionV src1 src2)); 10304 effect(TEMP tmp, TEMP tmp2); 10305 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10306 "vpandq $tmp2,$tmp2,$src2\n\t" 10307 "vextracti128_high $tmp,$tmp2\n\t" 10308 "vpandq $tmp2,$tmp2,$tmp\n\t" 10309 "vpshufd $tmp,$tmp2,0xE\n\t" 10310 "vpandq $tmp2,$tmp2,$tmp\n\t" 10311 "movdq $tmp,$src1\n\t" 10312 "vpandq $tmp2,$tmp2,$tmp\n\t" 10313 "movdq $dst,$tmp2\t! and reduction8L" %} 10314 ins_encode %{ 10315 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10316 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10317 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10318 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10319 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 10320 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10321 __ movdq($tmp$$XMMRegister, $src1$$Register); 10322 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10323 __ movdq($dst$$Register, $tmp2$$XMMRegister); 10324 %} 10325 ins_pipe( pipe_slow ); 10326 %} 10327 #endif 10328 10329 instruct rsor8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10330 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10331 match(Set dst (OrReductionV src1 src2)); 10332 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10333 format %{ 10334 "pshufd $tmp,$src2,0x1\n\t" 10335 "por $tmp,$src2\n\t" 10336 "movzbl $dst,$src1\n\t" 10337 "pextrb $tmp2,$tmp, 0x0\n\t" 10338 "orl $dst,$tmp2\n\t" 10339 "pextrb $tmp2,$tmp, 0x1\n\t" 10340 "orl $dst,$tmp2\n\t" 10341 "pextrb $tmp2,$tmp, 0x2\n\t" 10342 "orl $dst,$tmp2\n\t" 10343 "pextrb $tmp2,$tmp, 0x3\n\t" 10344 "orl $dst,$tmp2\n\t" 10345 "movsbl $dst,$dst\t! or reduction8B" %} 10346 ins_encode %{ 10347 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10348 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10349 __ movzbl($dst$$Register, $src1$$Register); 10350 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10351 __ orl($dst$$Register, $tmp2$$Register); 10352 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10353 __ orl($dst$$Register, $tmp2$$Register); 10354 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 10355 __ orl($dst$$Register, $tmp2$$Register); 10356 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 10357 __ orl($dst$$Register, $tmp2$$Register); 10358 __ movsbl($dst$$Register, $dst$$Register); 10359 %} 10360 ins_pipe( pipe_slow ); 10361 %} 10362 10363 instruct rsor16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10364 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10365 match(Set dst (OrReductionV src1 src2)); 10366 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10367 format %{ "pshufd $tmp,$src2,0xE\n\t" 10368 "por $tmp,$src2\n\t" 10369 "pshufd $tmp2,$tmp,0x1\n\t" 10370 "por $tmp,$tmp,$tmp2\n\t" 10371 "movzbl $dst,$src1\n\t" 10372 "pextrb $tmp3,$tmp, 0x0\n\t" 10373 "orl $dst,$tmp3\n\t" 10374 "pextrb $tmp3,$tmp, 0x1\n\t" 10375 "orl $dst,$tmp3\n\t" 10376 "pextrb $tmp3,$tmp, 0x2\n\t" 10377 "orl $dst,$tmp3\n\t" 10378 "pextrb $tmp3,$tmp, 0x3\n\t" 10379 "orl $dst,$tmp3\n\t" 10380 "movsbl $dst,$dst\t! or reduction16B" %} 10381 ins_encode %{ 10382 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10383 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10384 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10385 __ por($tmp$$XMMRegister, $tmp2$$XMMRegister); 10386 __ movzbl($dst$$Register, $src1$$Register); 10387 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10388 __ orl($dst$$Register, $tmp3$$Register); 10389 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10390 __ orl($dst$$Register, $tmp3$$Register); 10391 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10392 __ orl($dst$$Register, $tmp3$$Register); 10393 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10394 __ orl($dst$$Register, $tmp3$$Register); 10395 __ movsbl($dst$$Register, $dst$$Register); 10396 %} 10397 ins_pipe( pipe_slow ); 10398 %} 10399 10400 instruct rvor32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10401 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10402 match(Set dst (OrReductionV src1 src2)); 10403 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10404 format %{ "vextracti128_high $tmp,$src2\n\t" 10405 "vpor $tmp,$tmp,$src2\n\t" 10406 "pshufd $tmp2,$tmp,0xE\n\t" 10407 "vpor $tmp,$tmp,$tmp2\n\t" 10408 "pshufd $tmp2,$tmp,0x1\n\t" 10409 "vpor $tmp,$tmp,$tmp2\n\t" 10410 "movzbl $dst,$src1\n\t" 10411 "pextrb $tmp3,$tmp, 0x0\n\t" 10412 "orl $dst,$tmp3\n\t" 10413 "pextrb $tmp3,$tmp, 0x1\n\t" 10414 "orl $dst,$tmp3\n\t" 10415 "pextrb $tmp3,$tmp, 0x2\n\t" 10416 "orl $dst,$tmp3\n\t" 10417 "pextrb $tmp3,$tmp, 0x3\n\t" 10418 "orl $dst,$tmp3\n\t" 10419 "movsbl $dst,$dst\t! or reduction32B" %} 10420 ins_encode %{ 10421 int vector_len = 0; 10422 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10423 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10424 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10425 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10426 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10427 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10428 __ movzbl($dst$$Register, $src1$$Register); 10429 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10430 __ orl($dst$$Register, $tmp3$$Register); 10431 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10432 __ orl($dst$$Register, $tmp3$$Register); 10433 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10434 __ orl($dst$$Register, $tmp3$$Register); 10435 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10436 __ orl($dst$$Register, $tmp3$$Register); 10437 __ movsbl($dst$$Register, $dst$$Register); 10438 %} 10439 ins_pipe( pipe_slow ); 10440 %} 10441 10442 instruct rvor64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10443 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10444 match(Set dst (OrReductionV src1 src2)); 10445 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10446 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10447 "vpor $tmp2,$tmp2,$src2\n\t" 10448 "vextracti128_high $tmp,$tmp2\n\t" 10449 "vpor $tmp,$tmp,$tmp2\n\t" 10450 "pshufd $tmp2,$tmp,0xE\n\t" 10451 "vpor $tmp,$tmp,$tmp2\n\t" 10452 "pshufd $tmp2,$tmp,0x1\n\t" 10453 "vpor $tmp,$tmp,$tmp2\n\t" 10454 "movzbl $dst,$src1\n\t" 10455 "movdl $tmp3,$tmp\n\t" 10456 "orl $dst,$tmp3\n\t" 10457 "shrl $tmp3,0x8\n\t" 10458 "orl $dst,$tmp3\n\t" 10459 "shrl $tmp3,0x8\n\t" 10460 "orl $dst,$tmp3\n\t" 10461 "shrl $tmp3,0x8\n\t" 10462 "orl $dst,$tmp3\n\t" 10463 "movsbl $dst,$dst\t! or reduction64B" %} 10464 ins_encode %{ 10465 int vector_len = 0; 10466 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10467 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10468 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10469 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10470 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10471 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10472 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10473 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10474 __ movzbl($dst$$Register, $src1$$Register); 10475 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10476 __ orl($dst$$Register, $tmp3$$Register); 10477 __ shrl($tmp3$$Register, 8); 10478 __ orl($dst$$Register, $tmp3$$Register); 10479 __ shrl($tmp3$$Register, 8); 10480 __ orl($dst$$Register, $tmp3$$Register); 10481 __ shrl($tmp3$$Register, 8); 10482 __ orl($dst$$Register, $tmp3$$Register); 10483 __ movsbl($dst$$Register, $dst$$Register); 10484 %} 10485 ins_pipe( pipe_slow ); 10486 %} 10487 10488 instruct rsor4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10489 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10490 match(Set dst (OrReductionV src1 src2)); 10491 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10492 format %{ 10493 "pshufd $tmp,$src2,0x1\n\t" 10494 "por $tmp,$src2\n\t" 10495 "movzwl $dst,$src1\n\t" 10496 "pextrw $tmp2,$tmp, 0x0\n\t" 10497 "orw $dst,$tmp2\n\t" 10498 "pextrw $tmp2,$tmp, 0x1\n\t" 10499 "orw $dst,$tmp2\n\t" 10500 "movswl $dst,$dst\t! or reduction4S" %} 10501 ins_encode %{ 10502 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10503 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10504 __ movzwl($dst$$Register, $src1$$Register); 10505 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10506 __ orw($dst$$Register, $tmp2$$Register); 10507 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10508 __ orw($dst$$Register, $tmp2$$Register); 10509 __ movswl($dst$$Register, $dst$$Register); 10510 %} 10511 ins_pipe( pipe_slow ); 10512 %} 10513 10514 instruct rsor8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10515 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10516 match(Set dst (OrReductionV src1 src2)); 10517 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10518 format %{ "pshufd $tmp,$src2,0xE\n\t" 10519 "por $tmp,$src2\n\t" 10520 "pshufd $tmp2,$tmp,0x1\n\t" 10521 "por $tmp,$tmp,$tmp2\n\t" 10522 "movzwl $dst,$src1\n\t" 10523 "pextrw $tmp3,$tmp, 0x0\n\t" 10524 "orw $dst,$tmp3\n\t" 10525 "pextrw $tmp3,$tmp, 0x1\n\t" 10526 "orw $dst,$tmp3\n\t" 10527 "movswl $dst,$dst\t! or reduction8S" %} 10528 ins_encode %{ 10529 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10530 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10531 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10532 __ por($tmp$$XMMRegister, $tmp2$$XMMRegister); 10533 __ movzwl($dst$$Register, $src1$$Register); 10534 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10535 __ orw($dst$$Register, $tmp3$$Register); 10536 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10537 __ orw($dst$$Register, $tmp3$$Register); 10538 __ movswl($dst$$Register, $dst$$Register); 10539 %} 10540 ins_pipe( pipe_slow ); 10541 %} 10542 10543 instruct rvor16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10544 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10545 match(Set dst (OrReductionV src1 src2)); 10546 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10547 format %{ "vextracti128_high $tmp,$src2\n\t" 10548 "vpor $tmp,$tmp,$src2\n\t" 10549 "pshufd $tmp2,$tmp,0xE\n\t" 10550 "vpor $tmp,$tmp,$tmp2\n\t" 10551 "pshufd $tmp2,$tmp,0x1\n\t" 10552 "vpor $tmp,$tmp,$tmp2\n\t" 10553 "movzwl $dst,$src1\n\t" 10554 "pextrw $tmp3,$tmp, 0x0\n\t" 10555 "orw $dst,$tmp3\n\t" 10556 "pextrw $tmp3,$tmp, 0x1\n\t" 10557 "orw $dst,$tmp3\n\t" 10558 "movswl $dst,$dst\t! or reduction16S" %} 10559 ins_encode %{ 10560 int vector_len = 0; 10561 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10562 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10563 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10564 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10565 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10566 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10567 __ movzwl($dst$$Register, $src1$$Register); 10568 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10569 __ orw($dst$$Register, $tmp3$$Register); 10570 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10571 __ orw($dst$$Register, $tmp3$$Register); 10572 __ movswl($dst$$Register, $dst$$Register); 10573 %} 10574 ins_pipe( pipe_slow ); 10575 %} 10576 10577 instruct rvor32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10578 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10579 match(Set dst (OrReductionV src1 src2)); 10580 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10581 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10582 "vpor $tmp2,$tmp2,$src2\n\t" 10583 "vextracti128_high $tmp,$tmp2\n\t" 10584 "vpor $tmp,$tmp,$tmp2\n\t" 10585 "pshufd $tmp2,$tmp,0xE\n\t" 10586 "vpor $tmp,$tmp,$tmp2\n\t" 10587 "pshufd $tmp2,$tmp,0x1\n\t" 10588 "vpor $tmp,$tmp,$tmp2\n\t" 10589 "movzwl $dst,$src1\n\t" 10590 "movdl $tmp3,$tmp\n\t" 10591 "orw $dst,$tmp3\n\t" 10592 "shrl $tmp3,0x16\n\t" 10593 "orw $dst,$tmp3\n\t" 10594 "movswl $dst,$dst\t! or reduction32S" %} 10595 ins_encode %{ 10596 int vector_len = 0; 10597 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10598 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10599 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10600 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10601 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10602 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10603 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10604 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10605 __ movzwl($dst$$Register, $src1$$Register); 10606 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10607 __ orw($dst$$Register, $tmp3$$Register); 10608 __ shrl($tmp3$$Register, 16); 10609 __ orw($dst$$Register, $tmp3$$Register); 10610 __ movswl($dst$$Register, $dst$$Register); 10611 %} 10612 ins_pipe( pipe_slow ); 10613 %} 10614 10615 instruct rsor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 10616 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10617 match(Set dst (OrReductionV src1 src2)); 10618 effect(TEMP tmp, TEMP tmp2); 10619 format %{ "pshufd $tmp2,$src2,0x1\n\t" 10620 "por $tmp2,$src2\n\t" 10621 "movd $tmp,$src1\n\t" 10622 "por $tmp2,$tmp\n\t" 10623 "movd $dst,$tmp2\t! or reduction2I" %} 10624 ins_encode %{ 10625 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 10626 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 10627 __ movdl($tmp$$XMMRegister, $src1$$Register); 10628 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10629 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10630 %} 10631 ins_pipe( pipe_slow ); 10632 %} 10633 10634 instruct rsor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 10635 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10636 match(Set dst (OrReductionV src1 src2)); 10637 effect(TEMP tmp, TEMP tmp2); 10638 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10639 "por $tmp2,$src2\n\t" 10640 "pshufd $tmp,$tmp2,0x1\n\t" 10641 "por $tmp2,$tmp\n\t" 10642 "movd $tmp,$src1\n\t" 10643 "por $tmp2,$tmp\n\t" 10644 "movd $dst,$tmp2\t! or reduction4I" %} 10645 ins_encode %{ 10646 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10647 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 10648 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 10649 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10650 __ movdl($tmp$$XMMRegister, $src1$$Register); 10651 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10652 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10653 %} 10654 ins_pipe( pipe_slow ); 10655 %} 10656 10657 instruct rvor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 10658 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10659 match(Set dst (OrReductionV src1 src2)); 10660 effect(TEMP tmp, TEMP tmp2); 10661 format %{ "vextracti128_high $tmp,$src2\n\t" 10662 "vpor $tmp,$tmp,$src2\n\t" 10663 "vpshufd $tmp2,$tmp,0xE\t" 10664 "vpor $tmp,$tmp,$tmp2\n\t" 10665 "vpshufd $tmp2,$tmp,0x1\t" 10666 "vpor $tmp,$tmp,$tmp2\n\t" 10667 "movd $tmp2,$src1\n\t" 10668 "vpor $tmp2,$tmp,$tmp2\n\t" 10669 "movd $dst,$tmp2\t! or reduction8I" %} 10670 ins_encode %{ 10671 int vector_len = 0; 10672 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10673 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10674 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10675 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10676 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10677 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10678 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10679 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10680 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10681 %} 10682 ins_pipe( pipe_slow ); 10683 %} 10684 10685 instruct rvor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 10686 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10687 match(Set dst (OrReductionV src1 src2)); 10688 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 10689 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 10690 "vpor $tmp3,$tmp3,$src2\n\t" 10691 "vextracti128_high $tmp,$tmp3\n\t" 10692 "vpor $tmp,$tmp,$src2\n\t" 10693 "vpshufd $tmp2,$tmp,0xE\t" 10694 "vpor $tmp,$tmp,$tmp2\n\t" 10695 "vpshufd $tmp2,$tmp,0x1\t" 10696 "vpor $tmp,$tmp,$tmp2\n\t" 10697 "movd $tmp2,$src1\n\t" 10698 "vpor $tmp2,$tmp,$tmp2\n\t" 10699 "movd $dst,$tmp2\t! or reduction16I" %} 10700 ins_encode %{ 10701 int vector_len = 0; 10702 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 10703 __ vpor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 10704 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 10705 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 10706 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10707 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10708 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10709 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10710 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10711 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10712 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10713 %} 10714 ins_pipe( pipe_slow ); 10715 %} 10716 10717 instruct rsor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 10718 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10719 match(Set dst (OrReductionV src1 src2)); 10720 effect(TEMP tmp, TEMP tmp2); 10721 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10722 "por $tmp2,$src2\n\t" 10723 "movdq $tmp,$src1\n\t" 10724 "por $tmp2,$tmp\n\t" 10725 "movq $dst,$tmp2\t! or reduction2L" %} 10726 ins_encode %{ 10727 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10728 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 10729 __ movdq($tmp$$XMMRegister, $src1$$Register); 10730 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10731 __ movq($dst$$Register, $tmp2$$XMMRegister); 10732 %} 10733 ins_pipe( pipe_slow ); 10734 %} 10735 10736 instruct rvor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 10737 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10738 match(Set dst (OrReductionV src1 src2)); 10739 effect(TEMP tmp, TEMP tmp2); 10740 format %{ "vextracti128_high $tmp,$src2\n\t" 10741 "vpor $tmp2,$tmp,$src2\n\t" 10742 "vpshufd $tmp,$tmp2,0xE\t" 10743 "vpor $tmp2,$tmp2,$tmp\n\t" 10744 "movq $tmp,$src1\n\t" 10745 "vpor $tmp2,$tmp2,$tmp\n\t" 10746 "movq $dst,$tmp2\t! or reduction4L" %} 10747 ins_encode %{ 10748 int vector_len = 0; 10749 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10750 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10751 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 10752 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10753 __ movq($tmp$$XMMRegister, $src1$$Register); 10754 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10755 __ movq($dst$$Register, $tmp2$$XMMRegister); 10756 %} 10757 ins_pipe( pipe_slow ); 10758 %} 10759 10760 #ifdef _LP64 10761 instruct rvor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 10762 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10763 match(Set dst (OrReductionV src1 src2)); 10764 effect(TEMP tmp, TEMP tmp2); 10765 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10766 "vporq $tmp2,$tmp2,$src2\n\t" 10767 "vextracti128_high $tmp,$tmp2\n\t" 10768 "vporq $tmp2,$tmp2,$tmp\n\t" 10769 "vpshufd $tmp,$tmp2,0xE\t" 10770 "vporq $tmp2,$tmp2,$tmp\n\t" 10771 "movdq $tmp,$src1\n\t" 10772 "vporq $tmp2,$tmp2,$tmp\n\t" 10773 "movdq $dst,$tmp2\t! or reduction8L" %} 10774 ins_encode %{ 10775 int vector_len = 0; 10776 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10777 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10778 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10779 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10780 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 10781 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10782 __ movdq($tmp$$XMMRegister, $src1$$Register); 10783 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10784 __ movdq($dst$$Register, $tmp2$$XMMRegister); 10785 %} 10786 ins_pipe( pipe_slow ); 10787 %} 10788 #endif 10789 10790 instruct rsxor8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10791 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10792 match(Set dst (XorReductionV src1 src2)); 10793 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10794 format %{ 10795 "pshufd $tmp,$src2,0x1\n\t" 10796 "pxor $tmp,$src2\n\t" 10797 "movzbl $dst,$src1\n\t" 10798 "pextrb $tmp2,$tmp, 0x0\n\t" 10799 "xorl $dst,$tmp2\n\t" 10800 "pextrb $tmp2,$tmp, 0x1\n\t" 10801 "xorl $dst,$tmp2\n\t" 10802 "pextrb $tmp2,$tmp, 0x2\n\t" 10803 "xorl $dst,$tmp2\n\t" 10804 "pextrb $tmp2,$tmp, 0x3\n\t" 10805 "xorl $dst,$tmp2\n\t" 10806 "movsbl $dst,$dst\t! xor reduction8B" %} 10807 ins_encode %{ 10808 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10809 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10810 __ movzbl($dst$$Register, $src1$$Register); 10811 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10812 __ xorl($dst$$Register, $tmp2$$Register); 10813 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10814 __ xorl($dst$$Register, $tmp2$$Register); 10815 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 10816 __ xorl($dst$$Register, $tmp2$$Register); 10817 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 10818 __ xorl($dst$$Register, $tmp2$$Register); 10819 __ movsbl($dst$$Register, $dst$$Register); 10820 %} 10821 ins_pipe( pipe_slow ); 10822 %} 10823 10824 instruct rsxor16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10825 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10826 match(Set dst (XorReductionV src1 src2)); 10827 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10828 format %{ "pshufd $tmp,$src2,0xE\n\t" 10829 "pxor $tmp,$src2\n\t" 10830 "pshufd $tmp2,$tmp,0x1\n\t" 10831 "pxor $tmp,$tmp,$tmp2\n\t" 10832 "movzbl $dst,$src1\n\t" 10833 "pextrb $tmp3,$tmp, 0x0\n\t" 10834 "xorl $dst,$tmp3\n\t" 10835 "pextrb $tmp3,$tmp, 0x1\n\t" 10836 "xorl $dst,$tmp3\n\t" 10837 "pextrb $tmp3,$tmp, 0x2\n\t" 10838 "xorl $dst,$tmp3\n\t" 10839 "pextrb $tmp3,$tmp, 0x3\n\t" 10840 "xorl $dst,$tmp3\n\t" 10841 "movsbl $dst,$dst\t! xor reduction16B" %} 10842 ins_encode %{ 10843 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10844 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10845 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10846 __ pxor($tmp$$XMMRegister, $tmp2$$XMMRegister); 10847 __ movzbl($dst$$Register, $src1$$Register); 10848 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10849 __ xorl($dst$$Register, $tmp3$$Register); 10850 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10851 __ xorl($dst$$Register, $tmp3$$Register); 10852 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10853 __ xorl($dst$$Register, $tmp3$$Register); 10854 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10855 __ xorl($dst$$Register, $tmp3$$Register); 10856 __ movsbl($dst$$Register, $dst$$Register); 10857 %} 10858 ins_pipe( pipe_slow ); 10859 %} 10860 10861 instruct rvxor32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10862 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10863 match(Set dst (XorReductionV src1 src2)); 10864 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10865 format %{ "vextracti128_high $tmp,$src2\n\t" 10866 "vpxor $tmp,$tmp,$src2\n\t" 10867 "pshufd $tmp2,$tmp,0xE\n\t" 10868 "vpxor $tmp,$tmp,$tmp2\n\t" 10869 "pshufd $tmp2,$tmp,0x1\n\t" 10870 "vpxor $tmp,$tmp,$tmp2\n\t" 10871 "movzbl $dst,$src1\n\t" 10872 "pextrb $tmp3,$tmp, 0x0\n\t" 10873 "xorl $dst,$tmp3\n\t" 10874 "pextrb $tmp3,$tmp, 0x1\n\t" 10875 "xorl $dst,$tmp3\n\t" 10876 "pextrb $tmp3,$tmp, 0x2\n\t" 10877 "xorl $dst,$tmp3\n\t" 10878 "pextrb $tmp3,$tmp, 0x3\n\t" 10879 "xorl $dst,$tmp3\n\t" 10880 "movsbl $dst,$dst\t! xor reduction32B" %} 10881 ins_encode %{ 10882 int vector_len = 0; 10883 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10884 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10885 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10886 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10887 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10888 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10889 __ movzbl($dst$$Register, $src1$$Register); 10890 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10891 __ xorl($dst$$Register, $tmp3$$Register); 10892 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10893 __ xorl($dst$$Register, $tmp3$$Register); 10894 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10895 __ xorl($dst$$Register, $tmp3$$Register); 10896 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10897 __ xorl($dst$$Register, $tmp3$$Register); 10898 __ movsbl($dst$$Register, $dst$$Register); 10899 %} 10900 ins_pipe( pipe_slow ); 10901 %} 10902 10903 instruct rvxor64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10904 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10905 match(Set dst (XorReductionV src1 src2)); 10906 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10907 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10908 "vpxor $tmp2,$tmp2,$src2\n\t" 10909 "vextracti128_high $tmp,$tmp2\n\t" 10910 "vpxor $tmp,$tmp,$tmp2\n\t" 10911 "pshufd $tmp2,$tmp,0xE\n\t" 10912 "vpxor $tmp,$tmp,$tmp2\n\t" 10913 "pshufd $tmp2,$tmp,0x1\n\t" 10914 "vpxor $tmp,$tmp,$tmp2\n\t" 10915 "movzbl $dst,$src1\n\t" 10916 "movdl $tmp3,$tmp\n\t" 10917 "xorl $dst,$tmp3\n\t" 10918 "shrl $tmp3,0x8\n\t" 10919 "xorl $dst,$tmp3\n\t" 10920 "shrl $tmp3,0x8\n\t" 10921 "xorl $dst,$tmp3\n\t" 10922 "shrl $tmp3,0x8\n\t" 10923 "xorl $dst,$tmp3\n\t" 10924 "movsbl $dst,$dst\t! xor reduction64B" %} 10925 ins_encode %{ 10926 int vector_len = 0; 10927 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10928 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10929 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10930 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10931 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10932 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10933 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10934 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10935 __ movzbl($dst$$Register, $src1$$Register); 10936 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10937 __ xorl($dst$$Register, $tmp3$$Register); 10938 __ shrl($tmp3$$Register, 8); 10939 __ xorl($dst$$Register, $tmp3$$Register); 10940 __ shrl($tmp3$$Register, 8); 10941 __ xorl($dst$$Register, $tmp3$$Register); 10942 __ shrl($tmp3$$Register, 8); 10943 __ xorl($dst$$Register, $tmp3$$Register); 10944 __ movsbl($dst$$Register, $dst$$Register); 10945 %} 10946 ins_pipe( pipe_slow ); 10947 %} 10948 10949 instruct rsxor4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10950 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10951 match(Set dst (XorReductionV src1 src2)); 10952 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10953 format %{ 10954 "pshufd $tmp,$src2,0x1\n\t" 10955 "pxor $tmp,$src2\n\t" 10956 "movzwl $dst,$src1\n\t" 10957 "pextrw $tmp2,$tmp, 0x0\n\t" 10958 "xorw $dst,$tmp2\n\t" 10959 "pextrw $tmp2,$tmp, 0x1\n\t" 10960 "xorw $dst,$tmp2\n\t" 10961 "movswl $dst,$dst\t! xor reduction4S" %} 10962 ins_encode %{ 10963 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10964 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10965 __ movzwl($dst$$Register, $src1$$Register); 10966 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10967 __ xorw($dst$$Register, $tmp2$$Register); 10968 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10969 __ xorw($dst$$Register, $tmp2$$Register); 10970 __ movswl($dst$$Register, $dst$$Register); 10971 %} 10972 ins_pipe( pipe_slow ); 10973 %} 10974 10975 instruct rsxor8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10976 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10977 match(Set dst (XorReductionV src1 src2)); 10978 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10979 format %{ "pshufd $tmp,$src2,0xE\n\t" 10980 "pxor $tmp,$src2\n\t" 10981 "pshufd $tmp2,$tmp,0x1\n\t" 10982 "pxor $tmp,$tmp,$tmp2\n\t" 10983 "movzwl $dst,$src1\n\t" 10984 "pextrw $tmp3,$tmp, 0x0\n\t" 10985 "xorw $dst,$tmp3\n\t" 10986 "pextrw $tmp3,$tmp, 0x1\n\t" 10987 "xorw $dst,$tmp3\n\t" 10988 "movswl $dst,$dst\t! xor reduction8S" %} 10989 ins_encode %{ 10990 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10991 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10992 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10993 __ pxor($tmp$$XMMRegister, $tmp2$$XMMRegister); 10994 __ movzwl($dst$$Register, $src1$$Register); 10995 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10996 __ xorw($dst$$Register, $tmp3$$Register); 10997 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10998 __ xorw($dst$$Register, $tmp3$$Register); 10999 __ movswl($dst$$Register, $dst$$Register); 11000 %} 11001 ins_pipe( pipe_slow ); 11002 %} 11003 11004 instruct rvxor16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 11005 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 11006 match(Set dst (XorReductionV src1 src2)); 11007 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 11008 format %{ "vextracti128_high $tmp,$src2\n\t" 11009 "vpxor $tmp,$tmp,$src2\n\t" 11010 "pshufd $tmp2,$tmp,0xE\n\t" 11011 "vpxor $tmp,$tmp,$tmp2\n\t" 11012 "pshufd $tmp2,$tmp,0x1\n\t" 11013 "vpxor $tmp,$tmp,$tmp2\n\t" 11014 "movzwl $dst,$src1\n\t" 11015 "pextrw $tmp3,$tmp, 0x0\n\t" 11016 "xorw $dst,$tmp3\n\t" 11017 "pextrw $tmp3,$tmp, 0x1\n\t" 11018 "xorw $dst,$tmp3\n\t" 11019 "movswl $dst,$dst\t! xor reduction16S" %} 11020 ins_encode %{ 11021 int vector_len = 0; 11022 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 11023 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 11024 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 11025 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11026 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 11027 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11028 __ movzwl($dst$$Register, $src1$$Register); 11029 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 11030 __ xorw($dst$$Register, $tmp3$$Register); 11031 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 11032 __ xorw($dst$$Register, $tmp3$$Register); 11033 __ movswl($dst$$Register, $dst$$Register); 11034 %} 11035 ins_pipe( pipe_slow ); 11036 %} 11037 11038 instruct rvxor32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 11039 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 11040 match(Set dst (XorReductionV src1 src2)); 11041 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 11042 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 11043 "vpxor $tmp2,$tmp2,$src2\n\t" 11044 "vextracti128_high $tmp,$tmp2\n\t" 11045 "vpxor $tmp,$tmp,$tmp2\n\t" 11046 "pshufd $tmp2,$tmp,0xE\n\t" 11047 "vpxor $tmp,$tmp,$tmp2\n\t" 11048 "pshufd $tmp2,$tmp,0x1\n\t" 11049 "vpxor $tmp,$tmp,$tmp2\n\t" 11050 "movzwl $dst,$src1\n\t" 11051 "movdl $tmp3,$tmp\n\t" 11052 "xorw $dst,$tmp3\n\t" 11053 "shrl $tmp3,0x16\n\t" 11054 "xorw $dst,$tmp3\n\t" 11055 "movswl $dst,$dst\t! xor reduction32S" %} 11056 ins_encode %{ 11057 int vector_len = 0; 11058 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 11059 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 11060 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 11061 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11062 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 11063 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11064 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 11065 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11066 __ movzwl($dst$$Register, $src1$$Register); 11067 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 11068 __ xorw($dst$$Register, $tmp3$$Register); 11069 __ shrl($tmp3$$Register, 16); 11070 __ xorw($dst$$Register, $tmp3$$Register); 11071 __ movswl($dst$$Register, $dst$$Register); 11072 %} 11073 ins_pipe( pipe_slow ); 11074 %} 11075 11076 instruct rsxor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 11077 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11078 match(Set dst (XorReductionV src1 src2)); 11079 effect(TEMP tmp, TEMP tmp2); 11080 format %{ "pshufd $tmp2,$src2,0x1\n\t" 11081 "pxor $tmp2,$src2\n\t" 11082 "movd $tmp,$src1\n\t" 11083 "pxor $tmp2,$tmp\n\t" 11084 "movd $dst,$tmp2\t! xor reduction2I" %} 11085 ins_encode %{ 11086 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 11087 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 11088 __ movdl($tmp$$XMMRegister, $src1$$Register); 11089 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11090 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11091 %} 11092 ins_pipe( pipe_slow ); 11093 %} 11094 11095 instruct rsxor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 11096 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11097 match(Set dst (XorReductionV src1 src2)); 11098 effect(TEMP tmp, TEMP tmp2); 11099 format %{ "pshufd $tmp2,$src2,0xE\n\t" 11100 "pxor $tmp2,$src2\n\t" 11101 "pshufd $tmp,$tmp2,0x1\n\t" 11102 "pxor $tmp2,$tmp\n\t" 11103 "movd $tmp,$src1\n\t" 11104 "pxor $tmp2,$tmp\n\t" 11105 "movd $dst,$tmp2\t! xor reduction4I" %} 11106 ins_encode %{ 11107 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 11108 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 11109 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 11110 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11111 __ movdl($tmp$$XMMRegister, $src1$$Register); 11112 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11113 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11114 %} 11115 ins_pipe( pipe_slow ); 11116 %} 11117 11118 instruct rvxor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 11119 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11120 match(Set dst (XorReductionV src1 src2)); 11121 effect(TEMP tmp, TEMP tmp2); 11122 format %{ "vextracti128_high $tmp,$src2\n\t" 11123 "vpxor $tmp,$tmp,$src2\n\t" 11124 "vpshufd $tmp2,$tmp,0xE\t" 11125 "vpxor $tmp,$tmp,$tmp2\n\t" 11126 "vpshufd $tmp2,$tmp,0x1\t" 11127 "vpxor $tmp,$tmp,$tmp2\n\t" 11128 "movd $tmp2,$src1\n\t" 11129 "vpxor $tmp2,$tmp,$tmp2\n\t" 11130 "movd $dst,$tmp2\t! xor reduction8I" %} 11131 ins_encode %{ 11132 int vector_len = 0; 11133 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 11134 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 11135 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 11136 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11137 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 11138 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11139 __ movdl($tmp2$$XMMRegister, $src1$$Register); 11140 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11141 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11142 %} 11143 ins_pipe( pipe_slow ); 11144 %} 11145 11146 instruct rvxor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 11147 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11148 match(Set dst (XorReductionV src1 src2)); 11149 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 11150 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 11151 "vpxor $tmp3,$tmp3,$src2\n\t" 11152 "vextracti128_high $tmp,$tmp3\n\t" 11153 "vpxor $tmp,$tmp,$src2\n\t" 11154 "vpshufd $tmp2,$tmp,0xE\t" 11155 "vpxor $tmp,$tmp,$tmp2\n\t" 11156 "vpshufd $tmp2,$tmp,0x1\t" 11157 "vpxor $tmp,$tmp,$tmp2\n\t" 11158 "movd $tmp2,$src1\n\t" 11159 "vpxor $tmp2,$tmp,$tmp2\n\t" 11160 "movd $dst,$tmp2\t! xor reduction16I" %} 11161 ins_encode %{ 11162 int vector_len = 0; 11163 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 11164 __ vpxor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 11165 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 11166 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 11167 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 11168 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11169 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 11170 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11171 __ movdl($tmp2$$XMMRegister, $src1$$Register); 11172 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11173 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11174 %} 11175 ins_pipe( pipe_slow ); 11176 %} 11177 11178 instruct rsxor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 11179 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11180 match(Set dst (XorReductionV src1 src2)); 11181 effect(TEMP tmp, TEMP tmp2); 11182 format %{ "pshufd $tmp2,$src2,0xE\n\t" 11183 "pxor $tmp2,$src2\n\t" 11184 "movdq $tmp,$src1\n\t" 11185 "pxor $tmp2,$tmp\n\t" 11186 "movq $dst,$tmp2\t! xor reduction2L" %} 11187 ins_encode %{ 11188 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 11189 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 11190 __ movdq($tmp$$XMMRegister, $src1$$Register); 11191 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11192 __ movq($dst$$Register, $tmp2$$XMMRegister); 11193 %} 11194 ins_pipe( pipe_slow ); 11195 %} 11196 11197 instruct rvxor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 11198 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11199 match(Set dst (XorReductionV src1 src2)); 11200 effect(TEMP tmp, TEMP tmp2); 11201 format %{ "vextracti128_high $tmp,$src2\n\t" 11202 "vpxor $tmp2,$tmp,$src2\n\t" 11203 "vpshufd $tmp,$tmp2,0xE\t" 11204 "vpxor $tmp2,$tmp2,$tmp\n\t" 11205 "movq $tmp,$src1\n\t" 11206 "vpxor $tmp2,$tmp2,$tmp\n\t" 11207 "movq $dst,$tmp2\t! xor reduction4L" %} 11208 ins_encode %{ 11209 int vector_len = 0; 11210 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 11211 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 11212 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 11213 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11214 __ movq($tmp$$XMMRegister, $src1$$Register); 11215 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11216 __ movq($dst$$Register, $tmp2$$XMMRegister); 11217 %} 11218 ins_pipe( pipe_slow ); 11219 %} 11220 11221 #ifdef _LP64 11222 instruct rvxor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 11223 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11224 match(Set dst (XorReductionV src1 src2)); 11225 effect(TEMP tmp, TEMP tmp2); 11226 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 11227 "vpxorq $tmp2,$tmp2,$src2\n\t" 11228 "vextracti128_high $tmp,$tmp2\n\t" 11229 "vpxorq $tmp2,$tmp2,$tmp\n\t" 11230 "vpshufd $tmp,$tmp2,0xE\t" 11231 "vpxorq $tmp2,$tmp2,$tmp\n\t" 11232 "movdq $tmp,$src1\n\t" 11233 "vpxorq $tmp2,$tmp2,$tmp\n\t" 11234 "movdq $dst,$tmp2\t! xor reduction8L" %} 11235 ins_encode %{ 11236 int vector_len = 0; 11237 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 11238 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 11239 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 11240 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11241 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 11242 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11243 __ movdq($tmp$$XMMRegister, $src1$$Register); 11244 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11245 __ movdq($dst$$Register, $tmp2$$XMMRegister); 11246 %} 11247 ins_pipe( pipe_slow ); 11248 %} 11249 #endif 11250 11251 // ====================VECTOR ARITHMETIC======================================= 11252 11253 // --------------------------------- ADD -------------------------------------- 11254 11255 // Bytes vector add 11256 instruct vadd4B(vecS dst, vecS src) %{ 11257 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11258 match(Set dst (AddVB dst src)); 11259 format %{ "paddb $dst,$src\t! add packed4B" %} 11260 ins_encode %{ 11261 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 11262 %} 11263 ins_pipe( pipe_slow ); 11264 %} 11265 11266 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 11267 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11268 match(Set dst (AddVB src1 src2)); 11269 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 11270 ins_encode %{ 11271 int vector_len = 0; 11272 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11273 %} 11274 ins_pipe( pipe_slow ); 11275 %} 11276 11277 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 11278 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11279 match(Set dst (AddVB src1 src2)); 11280 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 11281 ins_encode %{ 11282 int vector_len = 0; 11283 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11284 %} 11285 ins_pipe( pipe_slow ); 11286 %} 11287 11288 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 11289 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11290 match(Set dst (AddVB dst src2)); 11291 effect(TEMP src1); 11292 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 11293 ins_encode %{ 11294 int vector_len = 0; 11295 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11296 %} 11297 ins_pipe( pipe_slow ); 11298 %} 11299 11300 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 11301 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11302 match(Set dst (AddVB src (LoadVector mem))); 11303 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 11304 ins_encode %{ 11305 int vector_len = 0; 11306 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11307 %} 11308 ins_pipe( pipe_slow ); 11309 %} 11310 11311 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 11312 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11313 match(Set dst (AddVB src (LoadVector mem))); 11314 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 11315 ins_encode %{ 11316 int vector_len = 0; 11317 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11318 %} 11319 ins_pipe( pipe_slow ); 11320 %} 11321 11322 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 11323 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11324 match(Set dst (AddVB dst (LoadVector mem))); 11325 effect(TEMP src); 11326 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 11327 ins_encode %{ 11328 int vector_len = 0; 11329 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11330 %} 11331 ins_pipe( pipe_slow ); 11332 %} 11333 11334 instruct vadd8B(vecD dst, vecD src) %{ 11335 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11336 match(Set dst (AddVB dst src)); 11337 format %{ "paddb $dst,$src\t! add packed8B" %} 11338 ins_encode %{ 11339 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 11340 %} 11341 ins_pipe( pipe_slow ); 11342 %} 11343 11344 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 11345 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11346 match(Set dst (AddVB src1 src2)); 11347 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 11348 ins_encode %{ 11349 int vector_len = 0; 11350 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11351 %} 11352 ins_pipe( pipe_slow ); 11353 %} 11354 11355 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 11356 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11357 match(Set dst (AddVB src1 src2)); 11358 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 11359 ins_encode %{ 11360 int vector_len = 0; 11361 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11362 %} 11363 ins_pipe( pipe_slow ); 11364 %} 11365 11366 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 11367 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11368 match(Set dst (AddVB dst src2)); 11369 effect(TEMP src1); 11370 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 11371 ins_encode %{ 11372 int vector_len = 0; 11373 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11374 %} 11375 ins_pipe( pipe_slow ); 11376 %} 11377 11378 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 11379 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11380 match(Set dst (AddVB src (LoadVector mem))); 11381 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 11382 ins_encode %{ 11383 int vector_len = 0; 11384 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11385 %} 11386 ins_pipe( pipe_slow ); 11387 %} 11388 11389 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 11390 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11391 match(Set dst (AddVB src (LoadVector mem))); 11392 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 11393 ins_encode %{ 11394 int vector_len = 0; 11395 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11396 %} 11397 ins_pipe( pipe_slow ); 11398 %} 11399 11400 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 11401 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11402 match(Set dst (AddVB dst (LoadVector mem))); 11403 effect(TEMP src); 11404 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 11405 ins_encode %{ 11406 int vector_len = 0; 11407 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11408 %} 11409 ins_pipe( pipe_slow ); 11410 %} 11411 11412 instruct vadd16B(vecX dst, vecX src) %{ 11413 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 11414 match(Set dst (AddVB dst src)); 11415 format %{ "paddb $dst,$src\t! add packed16B" %} 11416 ins_encode %{ 11417 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 11418 %} 11419 ins_pipe( pipe_slow ); 11420 %} 11421 11422 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11423 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 11424 match(Set dst (AddVB src1 src2)); 11425 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 11426 ins_encode %{ 11427 int vector_len = 0; 11428 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11429 %} 11430 ins_pipe( pipe_slow ); 11431 %} 11432 11433 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11434 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11435 match(Set dst (AddVB src1 src2)); 11436 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 11437 ins_encode %{ 11438 int vector_len = 0; 11439 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11440 %} 11441 ins_pipe( pipe_slow ); 11442 %} 11443 11444 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 11445 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11446 match(Set dst (AddVB dst src2)); 11447 effect(TEMP src1); 11448 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 11449 ins_encode %{ 11450 int vector_len = 0; 11451 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11452 %} 11453 ins_pipe( pipe_slow ); 11454 %} 11455 11456 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 11457 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 11458 match(Set dst (AddVB src (LoadVector mem))); 11459 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 11460 ins_encode %{ 11461 int vector_len = 0; 11462 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11463 %} 11464 ins_pipe( pipe_slow ); 11465 %} 11466 11467 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 11468 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11469 match(Set dst (AddVB src (LoadVector mem))); 11470 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 11471 ins_encode %{ 11472 int vector_len = 0; 11473 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11474 %} 11475 ins_pipe( pipe_slow ); 11476 %} 11477 11478 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 11479 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11480 match(Set dst (AddVB dst (LoadVector mem))); 11481 effect(TEMP src); 11482 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 11483 ins_encode %{ 11484 int vector_len = 0; 11485 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11486 %} 11487 ins_pipe( pipe_slow ); 11488 %} 11489 11490 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11491 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 11492 match(Set dst (AddVB src1 src2)); 11493 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 11494 ins_encode %{ 11495 int vector_len = 1; 11496 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11497 %} 11498 ins_pipe( pipe_slow ); 11499 %} 11500 11501 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11502 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11503 match(Set dst (AddVB src1 src2)); 11504 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 11505 ins_encode %{ 11506 int vector_len = 1; 11507 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11508 %} 11509 ins_pipe( pipe_slow ); 11510 %} 11511 11512 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 11513 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 11514 match(Set dst (AddVB dst src2)); 11515 effect(TEMP src1); 11516 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 11517 ins_encode %{ 11518 int vector_len = 1; 11519 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11520 %} 11521 ins_pipe( pipe_slow ); 11522 %} 11523 11524 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 11525 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 11526 match(Set dst (AddVB src (LoadVector mem))); 11527 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 11528 ins_encode %{ 11529 int vector_len = 1; 11530 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11531 %} 11532 ins_pipe( pipe_slow ); 11533 %} 11534 11535 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 11536 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11537 match(Set dst (AddVB src (LoadVector mem))); 11538 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 11539 ins_encode %{ 11540 int vector_len = 1; 11541 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11542 %} 11543 ins_pipe( pipe_slow ); 11544 %} 11545 11546 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 11547 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 11548 match(Set dst (AddVB dst (LoadVector mem))); 11549 effect(TEMP src); 11550 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 11551 ins_encode %{ 11552 int vector_len = 1; 11553 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11554 %} 11555 ins_pipe( pipe_slow ); 11556 %} 11557 11558 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11559 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 11560 match(Set dst (AddVB src1 src2)); 11561 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 11562 ins_encode %{ 11563 int vector_len = 2; 11564 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11565 %} 11566 ins_pipe( pipe_slow ); 11567 %} 11568 11569 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 11570 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 11571 match(Set dst (AddVB src (LoadVector mem))); 11572 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 11573 ins_encode %{ 11574 int vector_len = 2; 11575 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11576 %} 11577 ins_pipe( pipe_slow ); 11578 %} 11579 11580 // Shorts/Chars vector add 11581 instruct vadd2S(vecS dst, vecS src) %{ 11582 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 11583 match(Set dst (AddVS dst src)); 11584 format %{ "paddw $dst,$src\t! add packed2S" %} 11585 ins_encode %{ 11586 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 11587 %} 11588 ins_pipe( pipe_slow ); 11589 %} 11590 11591 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 11592 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11593 match(Set dst (AddVS src1 src2)); 11594 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 11595 ins_encode %{ 11596 int vector_len = 0; 11597 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11598 %} 11599 ins_pipe( pipe_slow ); 11600 %} 11601 11602 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 11603 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11604 match(Set dst (AddVS src1 src2)); 11605 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 11606 ins_encode %{ 11607 int vector_len = 0; 11608 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11609 %} 11610 ins_pipe( pipe_slow ); 11611 %} 11612 11613 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 11614 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11615 match(Set dst (AddVS dst src2)); 11616 effect(TEMP src1); 11617 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 11618 ins_encode %{ 11619 int vector_len = 0; 11620 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11621 %} 11622 ins_pipe( pipe_slow ); 11623 %} 11624 11625 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 11626 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11627 match(Set dst (AddVS src (LoadVector mem))); 11628 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 11629 ins_encode %{ 11630 int vector_len = 0; 11631 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11632 %} 11633 ins_pipe( pipe_slow ); 11634 %} 11635 11636 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 11637 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11638 match(Set dst (AddVS src (LoadVector mem))); 11639 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 11640 ins_encode %{ 11641 int vector_len = 0; 11642 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11643 %} 11644 ins_pipe( pipe_slow ); 11645 %} 11646 11647 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 11648 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11649 match(Set dst (AddVS dst (LoadVector mem))); 11650 effect(TEMP src); 11651 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 11652 ins_encode %{ 11653 int vector_len = 0; 11654 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11655 %} 11656 ins_pipe( pipe_slow ); 11657 %} 11658 11659 instruct vadd4S(vecD dst, vecD src) %{ 11660 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11661 match(Set dst (AddVS dst src)); 11662 format %{ "paddw $dst,$src\t! add packed4S" %} 11663 ins_encode %{ 11664 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 11665 %} 11666 ins_pipe( pipe_slow ); 11667 %} 11668 11669 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 11670 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11671 match(Set dst (AddVS src1 src2)); 11672 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 11673 ins_encode %{ 11674 int vector_len = 0; 11675 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11676 %} 11677 ins_pipe( pipe_slow ); 11678 %} 11679 11680 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 11681 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11682 match(Set dst (AddVS src1 src2)); 11683 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 11684 ins_encode %{ 11685 int vector_len = 0; 11686 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11687 %} 11688 ins_pipe( pipe_slow ); 11689 %} 11690 11691 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 11692 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11693 match(Set dst (AddVS dst src2)); 11694 effect(TEMP src1); 11695 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 11696 ins_encode %{ 11697 int vector_len = 0; 11698 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11699 %} 11700 ins_pipe( pipe_slow ); 11701 %} 11702 11703 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 11704 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11705 match(Set dst (AddVS src (LoadVector mem))); 11706 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 11707 ins_encode %{ 11708 int vector_len = 0; 11709 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11710 %} 11711 ins_pipe( pipe_slow ); 11712 %} 11713 11714 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 11715 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11716 match(Set dst (AddVS src (LoadVector mem))); 11717 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 11718 ins_encode %{ 11719 int vector_len = 0; 11720 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11721 %} 11722 ins_pipe( pipe_slow ); 11723 %} 11724 11725 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 11726 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11727 match(Set dst (AddVS dst (LoadVector mem))); 11728 effect(TEMP src); 11729 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 11730 ins_encode %{ 11731 int vector_len = 0; 11732 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11733 %} 11734 ins_pipe( pipe_slow ); 11735 %} 11736 11737 instruct vadd8S(vecX dst, vecX src) %{ 11738 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11739 match(Set dst (AddVS dst src)); 11740 format %{ "paddw $dst,$src\t! add packed8S" %} 11741 ins_encode %{ 11742 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 11743 %} 11744 ins_pipe( pipe_slow ); 11745 %} 11746 11747 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11748 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11749 match(Set dst (AddVS src1 src2)); 11750 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 11751 ins_encode %{ 11752 int vector_len = 0; 11753 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11754 %} 11755 ins_pipe( pipe_slow ); 11756 %} 11757 11758 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11759 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11760 match(Set dst (AddVS src1 src2)); 11761 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 11762 ins_encode %{ 11763 int vector_len = 0; 11764 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11765 %} 11766 ins_pipe( pipe_slow ); 11767 %} 11768 11769 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 11770 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11771 match(Set dst (AddVS dst src2)); 11772 effect(TEMP src1); 11773 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 11774 ins_encode %{ 11775 int vector_len = 0; 11776 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11777 %} 11778 ins_pipe( pipe_slow ); 11779 %} 11780 11781 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 11782 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11783 match(Set dst (AddVS src (LoadVector mem))); 11784 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 11785 ins_encode %{ 11786 int vector_len = 0; 11787 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11788 %} 11789 ins_pipe( pipe_slow ); 11790 %} 11791 11792 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 11793 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11794 match(Set dst (AddVS src (LoadVector mem))); 11795 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 11796 ins_encode %{ 11797 int vector_len = 0; 11798 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11799 %} 11800 ins_pipe( pipe_slow ); 11801 %} 11802 11803 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 11804 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11805 match(Set dst (AddVS dst (LoadVector mem))); 11806 effect(TEMP src); 11807 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 11808 ins_encode %{ 11809 int vector_len = 0; 11810 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11811 %} 11812 ins_pipe( pipe_slow ); 11813 %} 11814 11815 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11816 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11817 match(Set dst (AddVS src1 src2)); 11818 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 11819 ins_encode %{ 11820 int vector_len = 1; 11821 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11822 %} 11823 ins_pipe( pipe_slow ); 11824 %} 11825 11826 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11827 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11828 match(Set dst (AddVS src1 src2)); 11829 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 11830 ins_encode %{ 11831 int vector_len = 1; 11832 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11833 %} 11834 ins_pipe( pipe_slow ); 11835 %} 11836 11837 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 11838 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11839 match(Set dst (AddVS dst src2)); 11840 effect(TEMP src1); 11841 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 11842 ins_encode %{ 11843 int vector_len = 1; 11844 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11845 %} 11846 ins_pipe( pipe_slow ); 11847 %} 11848 11849 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 11850 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11851 match(Set dst (AddVS src (LoadVector mem))); 11852 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 11853 ins_encode %{ 11854 int vector_len = 1; 11855 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11856 %} 11857 ins_pipe( pipe_slow ); 11858 %} 11859 11860 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 11861 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11862 match(Set dst (AddVS src (LoadVector mem))); 11863 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 11864 ins_encode %{ 11865 int vector_len = 1; 11866 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11867 %} 11868 ins_pipe( pipe_slow ); 11869 %} 11870 11871 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 11872 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11873 match(Set dst (AddVS dst (LoadVector mem))); 11874 effect(TEMP src); 11875 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 11876 ins_encode %{ 11877 int vector_len = 1; 11878 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11879 %} 11880 ins_pipe( pipe_slow ); 11881 %} 11882 11883 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11884 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11885 match(Set dst (AddVS src1 src2)); 11886 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 11887 ins_encode %{ 11888 int vector_len = 2; 11889 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11890 %} 11891 ins_pipe( pipe_slow ); 11892 %} 11893 11894 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 11895 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11896 match(Set dst (AddVS src (LoadVector mem))); 11897 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 11898 ins_encode %{ 11899 int vector_len = 2; 11900 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11901 %} 11902 ins_pipe( pipe_slow ); 11903 %} 11904 11905 // Integers vector add 11906 instruct vadd2I(vecD dst, vecD src) %{ 11907 predicate(n->as_Vector()->length() == 2); 11908 match(Set dst (AddVI dst src)); 11909 format %{ "paddd $dst,$src\t! add packed2I" %} 11910 ins_encode %{ 11911 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 11912 %} 11913 ins_pipe( pipe_slow ); 11914 %} 11915 11916 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 11917 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11918 match(Set dst (AddVI src1 src2)); 11919 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 11920 ins_encode %{ 11921 int vector_len = 0; 11922 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11923 %} 11924 ins_pipe( pipe_slow ); 11925 %} 11926 11927 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 11928 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11929 match(Set dst (AddVI src (LoadVector mem))); 11930 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 11931 ins_encode %{ 11932 int vector_len = 0; 11933 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11934 %} 11935 ins_pipe( pipe_slow ); 11936 %} 11937 11938 instruct vadd4I(vecX dst, vecX src) %{ 11939 predicate(n->as_Vector()->length() == 4); 11940 match(Set dst (AddVI dst src)); 11941 format %{ "paddd $dst,$src\t! add packed4I" %} 11942 ins_encode %{ 11943 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 11944 %} 11945 ins_pipe( pipe_slow ); 11946 %} 11947 11948 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 11949 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11950 match(Set dst (AddVI src1 src2)); 11951 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 11952 ins_encode %{ 11953 int vector_len = 0; 11954 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11955 %} 11956 ins_pipe( pipe_slow ); 11957 %} 11958 11959 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 11960 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11961 match(Set dst (AddVI src (LoadVector mem))); 11962 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 11963 ins_encode %{ 11964 int vector_len = 0; 11965 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11966 %} 11967 ins_pipe( pipe_slow ); 11968 %} 11969 11970 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 11971 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11972 match(Set dst (AddVI src1 src2)); 11973 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 11974 ins_encode %{ 11975 int vector_len = 1; 11976 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11977 %} 11978 ins_pipe( pipe_slow ); 11979 %} 11980 11981 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 11982 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11983 match(Set dst (AddVI src (LoadVector mem))); 11984 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 11985 ins_encode %{ 11986 int vector_len = 1; 11987 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11988 %} 11989 ins_pipe( pipe_slow ); 11990 %} 11991 11992 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11993 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11994 match(Set dst (AddVI src1 src2)); 11995 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 11996 ins_encode %{ 11997 int vector_len = 2; 11998 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11999 %} 12000 ins_pipe( pipe_slow ); 12001 %} 12002 12003 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 12004 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12005 match(Set dst (AddVI src (LoadVector mem))); 12006 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 12007 ins_encode %{ 12008 int vector_len = 2; 12009 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12010 %} 12011 ins_pipe( pipe_slow ); 12012 %} 12013 12014 // Longs vector add 12015 instruct vadd2L(vecX dst, vecX src) %{ 12016 predicate(n->as_Vector()->length() == 2); 12017 match(Set dst (AddVL dst src)); 12018 format %{ "paddq $dst,$src\t! add packed2L" %} 12019 ins_encode %{ 12020 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 12021 %} 12022 ins_pipe( pipe_slow ); 12023 %} 12024 12025 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 12026 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12027 match(Set dst (AddVL src1 src2)); 12028 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 12029 ins_encode %{ 12030 int vector_len = 0; 12031 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12032 %} 12033 ins_pipe( pipe_slow ); 12034 %} 12035 12036 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 12037 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12038 match(Set dst (AddVL src (LoadVector mem))); 12039 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 12040 ins_encode %{ 12041 int vector_len = 0; 12042 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12043 %} 12044 ins_pipe( pipe_slow ); 12045 %} 12046 12047 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 12048 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12049 match(Set dst (AddVL src1 src2)); 12050 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 12051 ins_encode %{ 12052 int vector_len = 1; 12053 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12054 %} 12055 ins_pipe( pipe_slow ); 12056 %} 12057 12058 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 12059 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12060 match(Set dst (AddVL src (LoadVector mem))); 12061 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 12062 ins_encode %{ 12063 int vector_len = 1; 12064 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12065 %} 12066 ins_pipe( pipe_slow ); 12067 %} 12068 12069 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12070 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12071 match(Set dst (AddVL src1 src2)); 12072 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 12073 ins_encode %{ 12074 int vector_len = 2; 12075 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12076 %} 12077 ins_pipe( pipe_slow ); 12078 %} 12079 12080 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 12081 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12082 match(Set dst (AddVL src (LoadVector mem))); 12083 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 12084 ins_encode %{ 12085 int vector_len = 2; 12086 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12087 %} 12088 ins_pipe( pipe_slow ); 12089 %} 12090 12091 // Floats vector add 12092 instruct vadd2F(vecD dst, vecD src) %{ 12093 predicate(n->as_Vector()->length() == 2); 12094 match(Set dst (AddVF dst src)); 12095 format %{ "addps $dst,$src\t! add packed2F" %} 12096 ins_encode %{ 12097 __ addps($dst$$XMMRegister, $src$$XMMRegister); 12098 %} 12099 ins_pipe( pipe_slow ); 12100 %} 12101 12102 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 12103 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12104 match(Set dst (AddVF src1 src2)); 12105 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 12106 ins_encode %{ 12107 int vector_len = 0; 12108 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12109 %} 12110 ins_pipe( pipe_slow ); 12111 %} 12112 12113 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 12114 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12115 match(Set dst (AddVF src (LoadVector mem))); 12116 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 12117 ins_encode %{ 12118 int vector_len = 0; 12119 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12120 %} 12121 ins_pipe( pipe_slow ); 12122 %} 12123 12124 instruct vadd4F(vecX dst, vecX src) %{ 12125 predicate(n->as_Vector()->length() == 4); 12126 match(Set dst (AddVF dst src)); 12127 format %{ "addps $dst,$src\t! add packed4F" %} 12128 ins_encode %{ 12129 __ addps($dst$$XMMRegister, $src$$XMMRegister); 12130 %} 12131 ins_pipe( pipe_slow ); 12132 %} 12133 12134 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 12135 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12136 match(Set dst (AddVF src1 src2)); 12137 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 12138 ins_encode %{ 12139 int vector_len = 0; 12140 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12141 %} 12142 ins_pipe( pipe_slow ); 12143 %} 12144 12145 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 12146 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12147 match(Set dst (AddVF src (LoadVector mem))); 12148 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 12149 ins_encode %{ 12150 int vector_len = 0; 12151 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12152 %} 12153 ins_pipe( pipe_slow ); 12154 %} 12155 12156 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 12157 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 12158 match(Set dst (AddVF src1 src2)); 12159 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 12160 ins_encode %{ 12161 int vector_len = 1; 12162 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12163 %} 12164 ins_pipe( pipe_slow ); 12165 %} 12166 12167 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 12168 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 12169 match(Set dst (AddVF src (LoadVector mem))); 12170 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 12171 ins_encode %{ 12172 int vector_len = 1; 12173 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12174 %} 12175 ins_pipe( pipe_slow ); 12176 %} 12177 12178 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12179 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12180 match(Set dst (AddVF src1 src2)); 12181 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 12182 ins_encode %{ 12183 int vector_len = 2; 12184 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12185 %} 12186 ins_pipe( pipe_slow ); 12187 %} 12188 12189 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 12190 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12191 match(Set dst (AddVF src (LoadVector mem))); 12192 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 12193 ins_encode %{ 12194 int vector_len = 2; 12195 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12196 %} 12197 ins_pipe( pipe_slow ); 12198 %} 12199 12200 // Doubles vector add 12201 instruct vadd2D(vecX dst, vecX src) %{ 12202 predicate(n->as_Vector()->length() == 2); 12203 match(Set dst (AddVD dst src)); 12204 format %{ "addpd $dst,$src\t! add packed2D" %} 12205 ins_encode %{ 12206 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 12207 %} 12208 ins_pipe( pipe_slow ); 12209 %} 12210 12211 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 12212 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12213 match(Set dst (AddVD src1 src2)); 12214 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 12215 ins_encode %{ 12216 int vector_len = 0; 12217 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12218 %} 12219 ins_pipe( pipe_slow ); 12220 %} 12221 12222 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 12223 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12224 match(Set dst (AddVD src (LoadVector mem))); 12225 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 12226 ins_encode %{ 12227 int vector_len = 0; 12228 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12229 %} 12230 ins_pipe( pipe_slow ); 12231 %} 12232 12233 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 12234 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12235 match(Set dst (AddVD src1 src2)); 12236 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 12237 ins_encode %{ 12238 int vector_len = 1; 12239 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12240 %} 12241 ins_pipe( pipe_slow ); 12242 %} 12243 12244 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 12245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12246 match(Set dst (AddVD src (LoadVector mem))); 12247 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 12248 ins_encode %{ 12249 int vector_len = 1; 12250 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12251 %} 12252 ins_pipe( pipe_slow ); 12253 %} 12254 12255 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12256 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12257 match(Set dst (AddVD src1 src2)); 12258 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 12259 ins_encode %{ 12260 int vector_len = 2; 12261 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12262 %} 12263 ins_pipe( pipe_slow ); 12264 %} 12265 12266 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 12267 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12268 match(Set dst (AddVD src (LoadVector mem))); 12269 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 12270 ins_encode %{ 12271 int vector_len = 2; 12272 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12273 %} 12274 ins_pipe( pipe_slow ); 12275 %} 12276 12277 // --------------------------------- SUB -------------------------------------- 12278 12279 // Bytes vector sub 12280 instruct vsub4B(vecS dst, vecS src) %{ 12281 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12282 match(Set dst (SubVB dst src)); 12283 format %{ "psubb $dst,$src\t! sub packed4B" %} 12284 ins_encode %{ 12285 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 12286 %} 12287 ins_pipe( pipe_slow ); 12288 %} 12289 12290 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 12291 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12292 match(Set dst (SubVB src1 src2)); 12293 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 12294 ins_encode %{ 12295 int vector_len = 0; 12296 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12297 %} 12298 ins_pipe( pipe_slow ); 12299 %} 12300 12301 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 12302 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12303 match(Set dst (SubVB src1 src2)); 12304 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 12305 ins_encode %{ 12306 int vector_len = 0; 12307 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12308 %} 12309 ins_pipe( pipe_slow ); 12310 %} 12311 12312 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 12313 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12314 match(Set dst (SubVB dst src2)); 12315 effect(TEMP src1); 12316 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 12317 ins_encode %{ 12318 int vector_len = 0; 12319 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12320 %} 12321 ins_pipe( pipe_slow ); 12322 %} 12323 12324 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 12325 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12326 match(Set dst (SubVB src (LoadVector mem))); 12327 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 12328 ins_encode %{ 12329 int vector_len = 0; 12330 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12331 %} 12332 ins_pipe( pipe_slow ); 12333 %} 12334 12335 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 12336 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12337 match(Set dst (SubVB src (LoadVector mem))); 12338 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 12339 ins_encode %{ 12340 int vector_len = 0; 12341 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12342 %} 12343 ins_pipe( pipe_slow ); 12344 %} 12345 12346 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 12347 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12348 match(Set dst (SubVB dst (LoadVector mem))); 12349 effect(TEMP src); 12350 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 12351 ins_encode %{ 12352 int vector_len = 0; 12353 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12354 %} 12355 ins_pipe( pipe_slow ); 12356 %} 12357 12358 instruct vsub8B(vecD dst, vecD src) %{ 12359 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12360 match(Set dst (SubVB dst src)); 12361 format %{ "psubb $dst,$src\t! sub packed8B" %} 12362 ins_encode %{ 12363 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 12364 %} 12365 ins_pipe( pipe_slow ); 12366 %} 12367 12368 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 12369 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12370 match(Set dst (SubVB src1 src2)); 12371 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 12372 ins_encode %{ 12373 int vector_len = 0; 12374 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12375 %} 12376 ins_pipe( pipe_slow ); 12377 %} 12378 12379 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 12380 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12381 match(Set dst (SubVB src1 src2)); 12382 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 12383 ins_encode %{ 12384 int vector_len = 0; 12385 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12386 %} 12387 ins_pipe( pipe_slow ); 12388 %} 12389 12390 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 12391 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12392 match(Set dst (SubVB dst src2)); 12393 effect(TEMP src1); 12394 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 12395 ins_encode %{ 12396 int vector_len = 0; 12397 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12398 %} 12399 ins_pipe( pipe_slow ); 12400 %} 12401 12402 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 12403 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12404 match(Set dst (SubVB src (LoadVector mem))); 12405 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 12406 ins_encode %{ 12407 int vector_len = 0; 12408 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12409 %} 12410 ins_pipe( pipe_slow ); 12411 %} 12412 12413 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 12414 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12415 match(Set dst (SubVB src (LoadVector mem))); 12416 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 12417 ins_encode %{ 12418 int vector_len = 0; 12419 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12420 %} 12421 ins_pipe( pipe_slow ); 12422 %} 12423 12424 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 12425 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12426 match(Set dst (SubVB dst (LoadVector mem))); 12427 effect(TEMP src); 12428 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 12429 ins_encode %{ 12430 int vector_len = 0; 12431 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12432 %} 12433 ins_pipe( pipe_slow ); 12434 %} 12435 12436 instruct vsub16B(vecX dst, vecX src) %{ 12437 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 12438 match(Set dst (SubVB dst src)); 12439 format %{ "psubb $dst,$src\t! sub packed16B" %} 12440 ins_encode %{ 12441 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 12442 %} 12443 ins_pipe( pipe_slow ); 12444 %} 12445 12446 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 12447 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 12448 match(Set dst (SubVB src1 src2)); 12449 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 12450 ins_encode %{ 12451 int vector_len = 0; 12452 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12453 %} 12454 ins_pipe( pipe_slow ); 12455 %} 12456 12457 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 12458 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12459 match(Set dst (SubVB src1 src2)); 12460 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 12461 ins_encode %{ 12462 int vector_len = 0; 12463 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12464 %} 12465 ins_pipe( pipe_slow ); 12466 %} 12467 12468 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 12469 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12470 match(Set dst (SubVB dst src2)); 12471 effect(TEMP src1); 12472 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 12473 ins_encode %{ 12474 int vector_len = 0; 12475 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12476 %} 12477 ins_pipe( pipe_slow ); 12478 %} 12479 12480 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 12481 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 12482 match(Set dst (SubVB src (LoadVector mem))); 12483 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 12484 ins_encode %{ 12485 int vector_len = 0; 12486 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12487 %} 12488 ins_pipe( pipe_slow ); 12489 %} 12490 12491 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 12492 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12493 match(Set dst (SubVB src (LoadVector mem))); 12494 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 12495 ins_encode %{ 12496 int vector_len = 0; 12497 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12498 %} 12499 ins_pipe( pipe_slow ); 12500 %} 12501 12502 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 12503 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12504 match(Set dst (SubVB dst (LoadVector mem))); 12505 effect(TEMP src); 12506 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 12507 ins_encode %{ 12508 int vector_len = 0; 12509 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12510 %} 12511 ins_pipe( pipe_slow ); 12512 %} 12513 12514 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 12515 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 12516 match(Set dst (SubVB src1 src2)); 12517 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 12518 ins_encode %{ 12519 int vector_len = 1; 12520 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12521 %} 12522 ins_pipe( pipe_slow ); 12523 %} 12524 12525 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 12526 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12527 match(Set dst (SubVB src1 src2)); 12528 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 12529 ins_encode %{ 12530 int vector_len = 1; 12531 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12532 %} 12533 ins_pipe( pipe_slow ); 12534 %} 12535 12536 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 12537 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 12538 match(Set dst (SubVB dst src2)); 12539 effect(TEMP src1); 12540 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 12541 ins_encode %{ 12542 int vector_len = 1; 12543 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12544 %} 12545 ins_pipe( pipe_slow ); 12546 %} 12547 12548 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 12549 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 12550 match(Set dst (SubVB src (LoadVector mem))); 12551 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 12552 ins_encode %{ 12553 int vector_len = 1; 12554 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12555 %} 12556 ins_pipe( pipe_slow ); 12557 %} 12558 12559 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 12560 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12561 match(Set dst (SubVB src (LoadVector mem))); 12562 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 12563 ins_encode %{ 12564 int vector_len = 1; 12565 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12566 %} 12567 ins_pipe( pipe_slow ); 12568 %} 12569 12570 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 12571 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 12572 match(Set dst (SubVB dst (LoadVector mem))); 12573 effect(TEMP src); 12574 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 12575 ins_encode %{ 12576 int vector_len = 1; 12577 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12578 %} 12579 ins_pipe( pipe_slow ); 12580 %} 12581 12582 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12583 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 12584 match(Set dst (SubVB src1 src2)); 12585 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 12586 ins_encode %{ 12587 int vector_len = 2; 12588 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12589 %} 12590 ins_pipe( pipe_slow ); 12591 %} 12592 12593 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 12594 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 12595 match(Set dst (SubVB src (LoadVector mem))); 12596 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 12597 ins_encode %{ 12598 int vector_len = 2; 12599 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12600 %} 12601 ins_pipe( pipe_slow ); 12602 %} 12603 12604 // Shorts/Chars vector sub 12605 instruct vsub2S(vecS dst, vecS src) %{ 12606 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12607 match(Set dst (SubVS dst src)); 12608 format %{ "psubw $dst,$src\t! sub packed2S" %} 12609 ins_encode %{ 12610 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 12611 %} 12612 ins_pipe( pipe_slow ); 12613 %} 12614 12615 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 12616 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12617 match(Set dst (SubVS src1 src2)); 12618 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 12619 ins_encode %{ 12620 int vector_len = 0; 12621 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12622 %} 12623 ins_pipe( pipe_slow ); 12624 %} 12625 12626 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 12627 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12628 match(Set dst (SubVS src1 src2)); 12629 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 12630 ins_encode %{ 12631 int vector_len = 0; 12632 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12633 %} 12634 ins_pipe( pipe_slow ); 12635 %} 12636 12637 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 12638 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12639 match(Set dst (SubVS dst src2)); 12640 effect(TEMP src1); 12641 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 12642 ins_encode %{ 12643 int vector_len = 0; 12644 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12645 %} 12646 ins_pipe( pipe_slow ); 12647 %} 12648 12649 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 12650 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12651 match(Set dst (SubVS src (LoadVector mem))); 12652 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 12653 ins_encode %{ 12654 int vector_len = 0; 12655 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12656 %} 12657 ins_pipe( pipe_slow ); 12658 %} 12659 12660 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 12661 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12662 match(Set dst (SubVS src (LoadVector mem))); 12663 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 12664 ins_encode %{ 12665 int vector_len = 0; 12666 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12667 %} 12668 ins_pipe( pipe_slow ); 12669 %} 12670 12671 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 12672 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12673 match(Set dst (SubVS dst (LoadVector mem))); 12674 effect(TEMP src); 12675 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 12676 ins_encode %{ 12677 int vector_len = 0; 12678 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12679 %} 12680 ins_pipe( pipe_slow ); 12681 %} 12682 12683 instruct vsub4S(vecD dst, vecD src) %{ 12684 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12685 match(Set dst (SubVS dst src)); 12686 format %{ "psubw $dst,$src\t! sub packed4S" %} 12687 ins_encode %{ 12688 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 12689 %} 12690 ins_pipe( pipe_slow ); 12691 %} 12692 12693 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 12694 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12695 match(Set dst (SubVS src1 src2)); 12696 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 12697 ins_encode %{ 12698 int vector_len = 0; 12699 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12700 %} 12701 ins_pipe( pipe_slow ); 12702 %} 12703 12704 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 12705 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12706 match(Set dst (SubVS src1 src2)); 12707 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 12708 ins_encode %{ 12709 int vector_len = 0; 12710 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12711 %} 12712 ins_pipe( pipe_slow ); 12713 %} 12714 12715 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 12716 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12717 match(Set dst (SubVS dst src2)); 12718 effect(TEMP src1); 12719 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 12720 ins_encode %{ 12721 int vector_len = 0; 12722 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12723 %} 12724 ins_pipe( pipe_slow ); 12725 %} 12726 12727 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 12728 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12729 match(Set dst (SubVS src (LoadVector mem))); 12730 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 12731 ins_encode %{ 12732 int vector_len = 0; 12733 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12734 %} 12735 ins_pipe( pipe_slow ); 12736 %} 12737 12738 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 12739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12740 match(Set dst (SubVS src (LoadVector mem))); 12741 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 12742 ins_encode %{ 12743 int vector_len = 0; 12744 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12745 %} 12746 ins_pipe( pipe_slow ); 12747 %} 12748 12749 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 12750 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12751 match(Set dst (SubVS dst (LoadVector mem))); 12752 effect(TEMP src); 12753 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 12754 ins_encode %{ 12755 int vector_len = 0; 12756 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12757 %} 12758 ins_pipe( pipe_slow ); 12759 %} 12760 12761 instruct vsub8S(vecX dst, vecX src) %{ 12762 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12763 match(Set dst (SubVS dst src)); 12764 format %{ "psubw $dst,$src\t! sub packed8S" %} 12765 ins_encode %{ 12766 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 12767 %} 12768 ins_pipe( pipe_slow ); 12769 %} 12770 12771 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 12772 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12773 match(Set dst (SubVS src1 src2)); 12774 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 12775 ins_encode %{ 12776 int vector_len = 0; 12777 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12778 %} 12779 ins_pipe( pipe_slow ); 12780 %} 12781 12782 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 12783 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12784 match(Set dst (SubVS src1 src2)); 12785 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 12786 ins_encode %{ 12787 int vector_len = 0; 12788 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12789 %} 12790 ins_pipe( pipe_slow ); 12791 %} 12792 12793 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 12794 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12795 match(Set dst (SubVS dst src2)); 12796 effect(TEMP src1); 12797 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 12798 ins_encode %{ 12799 int vector_len = 0; 12800 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12801 %} 12802 ins_pipe( pipe_slow ); 12803 %} 12804 12805 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 12806 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12807 match(Set dst (SubVS src (LoadVector mem))); 12808 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 12809 ins_encode %{ 12810 int vector_len = 0; 12811 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12812 %} 12813 ins_pipe( pipe_slow ); 12814 %} 12815 12816 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 12817 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12818 match(Set dst (SubVS src (LoadVector mem))); 12819 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 12820 ins_encode %{ 12821 int vector_len = 0; 12822 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12823 %} 12824 ins_pipe( pipe_slow ); 12825 %} 12826 12827 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 12828 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12829 match(Set dst (SubVS dst (LoadVector mem))); 12830 effect(TEMP src); 12831 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 12832 ins_encode %{ 12833 int vector_len = 0; 12834 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12835 %} 12836 ins_pipe( pipe_slow ); 12837 %} 12838 12839 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 12840 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12841 match(Set dst (SubVS src1 src2)); 12842 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 12843 ins_encode %{ 12844 int vector_len = 1; 12845 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12846 %} 12847 ins_pipe( pipe_slow ); 12848 %} 12849 12850 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 12851 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12852 match(Set dst (SubVS src1 src2)); 12853 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 12854 ins_encode %{ 12855 int vector_len = 1; 12856 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12857 %} 12858 ins_pipe( pipe_slow ); 12859 %} 12860 12861 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 12862 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12863 match(Set dst (SubVS dst src2)); 12864 effect(TEMP src1); 12865 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 12866 ins_encode %{ 12867 int vector_len = 1; 12868 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12869 %} 12870 ins_pipe( pipe_slow ); 12871 %} 12872 12873 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 12874 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12875 match(Set dst (SubVS src (LoadVector mem))); 12876 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 12877 ins_encode %{ 12878 int vector_len = 1; 12879 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12880 %} 12881 ins_pipe( pipe_slow ); 12882 %} 12883 12884 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 12885 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12886 match(Set dst (SubVS src (LoadVector mem))); 12887 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 12888 ins_encode %{ 12889 int vector_len = 1; 12890 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12891 %} 12892 ins_pipe( pipe_slow ); 12893 %} 12894 12895 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 12896 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12897 match(Set dst (SubVS dst (LoadVector mem))); 12898 effect(TEMP src); 12899 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 12900 ins_encode %{ 12901 int vector_len = 1; 12902 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12903 %} 12904 ins_pipe( pipe_slow ); 12905 %} 12906 12907 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12908 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12909 match(Set dst (SubVS src1 src2)); 12910 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 12911 ins_encode %{ 12912 int vector_len = 2; 12913 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12914 %} 12915 ins_pipe( pipe_slow ); 12916 %} 12917 12918 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 12919 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12920 match(Set dst (SubVS src (LoadVector mem))); 12921 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 12922 ins_encode %{ 12923 int vector_len = 2; 12924 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12925 %} 12926 ins_pipe( pipe_slow ); 12927 %} 12928 12929 // Integers vector sub 12930 instruct vsub2I(vecD dst, vecD src) %{ 12931 predicate(n->as_Vector()->length() == 2); 12932 match(Set dst (SubVI dst src)); 12933 format %{ "psubd $dst,$src\t! sub packed2I" %} 12934 ins_encode %{ 12935 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 12936 %} 12937 ins_pipe( pipe_slow ); 12938 %} 12939 12940 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 12941 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12942 match(Set dst (SubVI src1 src2)); 12943 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 12944 ins_encode %{ 12945 int vector_len = 0; 12946 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12947 %} 12948 ins_pipe( pipe_slow ); 12949 %} 12950 12951 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 12952 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12953 match(Set dst (SubVI src (LoadVector mem))); 12954 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 12955 ins_encode %{ 12956 int vector_len = 0; 12957 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12958 %} 12959 ins_pipe( pipe_slow ); 12960 %} 12961 12962 instruct vsub4I(vecX dst, vecX src) %{ 12963 predicate(n->as_Vector()->length() == 4); 12964 match(Set dst (SubVI dst src)); 12965 format %{ "psubd $dst,$src\t! sub packed4I" %} 12966 ins_encode %{ 12967 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 12968 %} 12969 ins_pipe( pipe_slow ); 12970 %} 12971 12972 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 12973 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12974 match(Set dst (SubVI src1 src2)); 12975 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 12976 ins_encode %{ 12977 int vector_len = 0; 12978 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12979 %} 12980 ins_pipe( pipe_slow ); 12981 %} 12982 12983 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 12984 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12985 match(Set dst (SubVI src (LoadVector mem))); 12986 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 12987 ins_encode %{ 12988 int vector_len = 0; 12989 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12990 %} 12991 ins_pipe( pipe_slow ); 12992 %} 12993 12994 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 12995 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12996 match(Set dst (SubVI src1 src2)); 12997 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 12998 ins_encode %{ 12999 int vector_len = 1; 13000 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13001 %} 13002 ins_pipe( pipe_slow ); 13003 %} 13004 13005 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 13006 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 13007 match(Set dst (SubVI src (LoadVector mem))); 13008 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 13009 ins_encode %{ 13010 int vector_len = 1; 13011 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13012 %} 13013 ins_pipe( pipe_slow ); 13014 %} 13015 13016 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13017 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13018 match(Set dst (SubVI src1 src2)); 13019 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 13020 ins_encode %{ 13021 int vector_len = 2; 13022 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13023 %} 13024 ins_pipe( pipe_slow ); 13025 %} 13026 13027 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 13028 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13029 match(Set dst (SubVI src (LoadVector mem))); 13030 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 13031 ins_encode %{ 13032 int vector_len = 2; 13033 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13034 %} 13035 ins_pipe( pipe_slow ); 13036 %} 13037 13038 // Longs vector sub 13039 instruct vsub2L(vecX dst, vecX src) %{ 13040 predicate(n->as_Vector()->length() == 2); 13041 match(Set dst (SubVL dst src)); 13042 format %{ "psubq $dst,$src\t! sub packed2L" %} 13043 ins_encode %{ 13044 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 13045 %} 13046 ins_pipe( pipe_slow ); 13047 %} 13048 13049 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 13050 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13051 match(Set dst (SubVL src1 src2)); 13052 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 13053 ins_encode %{ 13054 int vector_len = 0; 13055 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13056 %} 13057 ins_pipe( pipe_slow ); 13058 %} 13059 13060 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 13061 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13062 match(Set dst (SubVL src (LoadVector mem))); 13063 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 13064 ins_encode %{ 13065 int vector_len = 0; 13066 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13067 %} 13068 ins_pipe( pipe_slow ); 13069 %} 13070 13071 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 13072 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 13073 match(Set dst (SubVL src1 src2)); 13074 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 13075 ins_encode %{ 13076 int vector_len = 1; 13077 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13078 %} 13079 ins_pipe( pipe_slow ); 13080 %} 13081 13082 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 13083 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 13084 match(Set dst (SubVL src (LoadVector mem))); 13085 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 13086 ins_encode %{ 13087 int vector_len = 1; 13088 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13089 %} 13090 ins_pipe( pipe_slow ); 13091 %} 13092 13093 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13094 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13095 match(Set dst (SubVL src1 src2)); 13096 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 13097 ins_encode %{ 13098 int vector_len = 2; 13099 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13100 %} 13101 ins_pipe( pipe_slow ); 13102 %} 13103 13104 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 13105 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13106 match(Set dst (SubVL src (LoadVector mem))); 13107 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 13108 ins_encode %{ 13109 int vector_len = 2; 13110 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13111 %} 13112 ins_pipe( pipe_slow ); 13113 %} 13114 13115 // Floats vector sub 13116 instruct vsub2F(vecD dst, vecD src) %{ 13117 predicate(n->as_Vector()->length() == 2); 13118 match(Set dst (SubVF dst src)); 13119 format %{ "subps $dst,$src\t! sub packed2F" %} 13120 ins_encode %{ 13121 __ subps($dst$$XMMRegister, $src$$XMMRegister); 13122 %} 13123 ins_pipe( pipe_slow ); 13124 %} 13125 13126 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 13127 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13128 match(Set dst (SubVF src1 src2)); 13129 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 13130 ins_encode %{ 13131 int vector_len = 0; 13132 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13133 %} 13134 ins_pipe( pipe_slow ); 13135 %} 13136 13137 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 13138 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13139 match(Set dst (SubVF src (LoadVector mem))); 13140 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 13141 ins_encode %{ 13142 int vector_len = 0; 13143 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13144 %} 13145 ins_pipe( pipe_slow ); 13146 %} 13147 13148 instruct vsub4F(vecX dst, vecX src) %{ 13149 predicate(n->as_Vector()->length() == 4); 13150 match(Set dst (SubVF dst src)); 13151 format %{ "subps $dst,$src\t! sub packed4F" %} 13152 ins_encode %{ 13153 __ subps($dst$$XMMRegister, $src$$XMMRegister); 13154 %} 13155 ins_pipe( pipe_slow ); 13156 %} 13157 13158 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 13159 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13160 match(Set dst (SubVF src1 src2)); 13161 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 13162 ins_encode %{ 13163 int vector_len = 0; 13164 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13165 %} 13166 ins_pipe( pipe_slow ); 13167 %} 13168 13169 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 13170 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13171 match(Set dst (SubVF src (LoadVector mem))); 13172 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 13173 ins_encode %{ 13174 int vector_len = 0; 13175 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13176 %} 13177 ins_pipe( pipe_slow ); 13178 %} 13179 13180 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 13181 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 13182 match(Set dst (SubVF src1 src2)); 13183 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 13184 ins_encode %{ 13185 int vector_len = 1; 13186 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13187 %} 13188 ins_pipe( pipe_slow ); 13189 %} 13190 13191 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 13192 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 13193 match(Set dst (SubVF src (LoadVector mem))); 13194 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 13195 ins_encode %{ 13196 int vector_len = 1; 13197 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13198 %} 13199 ins_pipe( pipe_slow ); 13200 %} 13201 13202 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13203 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13204 match(Set dst (SubVF src1 src2)); 13205 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 13206 ins_encode %{ 13207 int vector_len = 2; 13208 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13209 %} 13210 ins_pipe( pipe_slow ); 13211 %} 13212 13213 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 13214 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13215 match(Set dst (SubVF src (LoadVector mem))); 13216 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 13217 ins_encode %{ 13218 int vector_len = 2; 13219 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13220 %} 13221 ins_pipe( pipe_slow ); 13222 %} 13223 13224 // Doubles vector sub 13225 instruct vsub2D(vecX dst, vecX src) %{ 13226 predicate(n->as_Vector()->length() == 2); 13227 match(Set dst (SubVD dst src)); 13228 format %{ "subpd $dst,$src\t! sub packed2D" %} 13229 ins_encode %{ 13230 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 13231 %} 13232 ins_pipe( pipe_slow ); 13233 %} 13234 13235 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 13236 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13237 match(Set dst (SubVD src1 src2)); 13238 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 13239 ins_encode %{ 13240 int vector_len = 0; 13241 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13242 %} 13243 ins_pipe( pipe_slow ); 13244 %} 13245 13246 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 13247 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13248 match(Set dst (SubVD src (LoadVector mem))); 13249 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 13250 ins_encode %{ 13251 int vector_len = 0; 13252 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13253 %} 13254 ins_pipe( pipe_slow ); 13255 %} 13256 13257 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 13258 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13259 match(Set dst (SubVD src1 src2)); 13260 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 13261 ins_encode %{ 13262 int vector_len = 1; 13263 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13264 %} 13265 ins_pipe( pipe_slow ); 13266 %} 13267 13268 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 13269 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13270 match(Set dst (SubVD src (LoadVector mem))); 13271 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 13272 ins_encode %{ 13273 int vector_len = 1; 13274 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13275 %} 13276 ins_pipe( pipe_slow ); 13277 %} 13278 13279 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13280 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13281 match(Set dst (SubVD src1 src2)); 13282 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 13283 ins_encode %{ 13284 int vector_len = 2; 13285 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13286 %} 13287 ins_pipe( pipe_slow ); 13288 %} 13289 13290 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 13291 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13292 match(Set dst (SubVD src (LoadVector mem))); 13293 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 13294 ins_encode %{ 13295 int vector_len = 2; 13296 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13297 %} 13298 ins_pipe( pipe_slow ); 13299 %} 13300 13301 // --------------------------------- MUL -------------------------------------- 13302 13303 // Byte vector mul 13304 13305 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ 13306 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 13307 match(Set dst (MulVB src1 src2)); 13308 effect(TEMP dst, TEMP tmp2, TEMP tmp); 13309 format %{"pmovsxbw $tmp,$src1\n\t" 13310 "pmovsxbw $tmp2,$src2\n\t" 13311 "pmullw $tmp,$tmp2\n\t" 13312 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 13313 "pand $tmp,$tmp2\n\t" 13314 "packuswb $tmp,$tmp\n\t" 13315 "movss $dst,$tmp\t! mul packed4B" %} 13316 ins_encode %{ 13317 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 13318 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 13319 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 13320 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13321 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 13322 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 13323 __ movss($dst$$XMMRegister, $tmp$$XMMRegister); 13324 %} 13325 ins_pipe( pipe_slow ); 13326 %} 13327 13328 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ 13329 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 13330 match(Set dst (MulVB src1 src2)); 13331 effect(TEMP dst, TEMP tmp2, TEMP tmp); 13332 format %{"pmovsxbw $tmp,$src1\n\t" 13333 "pmovsxbw $tmp2,$src2\n\t" 13334 "pmullw $tmp,$tmp2\n\t" 13335 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 13336 "pand $tmp,$tmp2\n\t" 13337 "packuswb $tmp,$tmp\n\t" 13338 "movsd $dst,$tmp\t! mul packed8B" %} 13339 ins_encode %{ 13340 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 13341 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 13342 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 13343 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13344 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 13345 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 13346 __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); 13347 %} 13348 ins_pipe( pipe_slow ); 13349 %} 13350 13351 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ 13352 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 13353 match(Set dst (MulVB src1 src2)); 13354 effect(TEMP tmp3, TEMP tmp2, TEMP tmp); 13355 format %{"pmovsxbw $tmp,$src1\n\t" 13356 "pmovsxbw $tmp2,$src2\n\t" 13357 "pmullw $tmp,$tmp2\n\t" 13358 "pshufd $tmp2,$src1\n\t" 13359 "pshufd $tmp3,$src2\n\t" 13360 "pmovsxbw $tmp2,$tmp2\n\t" 13361 "pmovsxbw $tmp3,$tmp3\n\t" 13362 "pmullw $tmp2,$tmp3\n\t" 13363 "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" 13364 "pand $tmp,$tmp3\n\t" 13365 "pand $tmp2,$tmp3\n\t" 13366 "packuswb $tmp,$tmp2\n\t" 13367 "movdqu $dst,$tmp \n\t! mul packed16B" %} 13368 ins_encode %{ 13369 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 13370 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 13371 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 13372 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); 13373 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); 13374 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 13375 __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); 13376 __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); 13377 __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13378 __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); 13379 __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); 13380 __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); 13381 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 13382 %} 13383 ins_pipe( pipe_slow ); 13384 %} 13385 13386 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ 13387 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 13388 match(Set dst (MulVB src1 src2)); 13389 effect(TEMP dst, TEMP tmp2, TEMP tmp); 13390 format %{"vpmovsxbw $tmp,$src1\n\t" 13391 "vpmovsxbw $tmp2,$src2\n\t" 13392 "vpmullw $tmp,$tmp2\n\t" 13393 "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 13394 "vpand $tmp,$tmp2\n\t" 13395 "vextracti128_high $tmp2,$tmp\n\t" 13396 "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} 13397 ins_encode %{ 13398 int vector_len = 1; 13399 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 13400 __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 13401 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 13402 __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13403 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 13404 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 13405 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 13406 %} 13407 ins_pipe( pipe_slow ); 13408 %} 13409 13410 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ 13411 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 13412 match(Set dst (MulVB src1 src2)); 13413 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); 13414 format %{"vextracti128_high $tmp1,$src1\n\t" 13415 "vextracti128_high $tmp3,$src2\n\t" 13416 "vpmovsxbw $tmp1,$tmp1\n\t" 13417 "vpmovsxbw $tmp3,$tmp3\n\t" 13418 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 13419 "vpmovsxbw $tmp2,$src1\n\t" 13420 "vpmovsxbw $tmp3,$src2\n\t" 13421 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 13422 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 13423 "vpbroadcastd $tmp3, $tmp3\n\t" 13424 "vpand $tmp2,$tmp2,$tmp3\n\t" 13425 "vpand $tmp1,$tmp1,$tmp3\n\t" 13426 "vpackuswb $dst,$tmp2,$tmp1\n\t" 13427 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 13428 ins_encode %{ 13429 int vector_len = 1; 13430 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 13431 __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); 13432 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13433 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13434 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13435 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 13436 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 13437 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13438 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13439 __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister); 13440 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13441 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13442 __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13443 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 13444 %} 13445 ins_pipe( pipe_slow ); 13446 %} 13447 13448 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, vecZ tmp3, vecZ tmp4, vecZ tmp5, vecZ tmp6) %{ 13449 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 13450 match(Set dst (MulVB src1 src2)); 13451 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4); 13452 format %{"vextracti64x4_high $tmp1,$src1\n\t" 13453 "vextracti64x4_high $tmp3,$src2\n\t" 13454 "vpmovsxbw $tmp1,$tmp1\n\t" 13455 "vpmovsxbw $tmp3,$tmp3\n\t" 13456 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 13457 "vpmovsxbw $tmp2,$src1\n\t" 13458 "vpmovsxbw $tmp3,$src2\n\t" 13459 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 13460 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 13461 "evpbroadcastd $tmp3, $tmp3\n\t" 13462 "evpandd $tmp1,$tmp1,$tmp3\n\t" 13463 "evpandd $tmp2,$tmp2,$tmp3\n\t" 13464 "vpackuswb $tmp1,$tmp2,$tmp1\n\t" 13465 "vextracti64x4_high $tmp3,$tmp1\n\t" 13466 "vpermq $tmp3, $tmp3, 0x8D\n\t" 13467 "vpermq $tmp1, $tmp1, 0xD8\n\t" 13468 "vmovdqu $tmp4,$tmp3\n\t" 13469 "vmovdqu $tmp2,$tmp1\n\t" 13470 "vpblendd $tmp3,$tmp3,$tmp1\n\t" 13471 "vpblendd $tmp2,$tmp2,$tmp4\n\t" 13472 "vpermq $tmp2,$tmp2,0x4E\n\t" 13473 "vinserti64x4 $dst,$dst,$tmp3,0x00\n\t" 13474 "vinserti64x4 $dst,$dst,$tmp2,0x01\t! mul packed64B" %} 13475 ins_encode %{ 13476 int vector_len = 2; 13477 KRegister ktmp = k1; 13478 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 13479 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 13480 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13481 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13482 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13483 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 13484 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 13485 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13486 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13487 __ evpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13488 __ evpandd($tmp1$$XMMRegister, ktmp, $tmp1$$XMMRegister, $tmp3$$XMMRegister, false, vector_len); 13489 __ evpandd($tmp2$$XMMRegister, ktmp, $tmp2$$XMMRegister, $tmp3$$XMMRegister, false, vector_len); 13490 __ vpackuswb($tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13491 __ vextracti64x4_high($tmp3$$XMMRegister, $tmp1$$XMMRegister); 13492 __ vpermq($tmp3$$XMMRegister, $tmp3$$XMMRegister, 0x8D, 1); 13493 __ vpermq($tmp1$$XMMRegister, $tmp1$$XMMRegister, 0xD8, 1); 13494 __ vmovdqu($tmp4$$XMMRegister, $tmp3$$XMMRegister); 13495 __ vmovdqu($tmp2$$XMMRegister, $tmp1$$XMMRegister); 13496 __ vpblendd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $tmp1$$XMMRegister, 0x0F, 1); 13497 __ vpblendd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp4$$XMMRegister, 0x0F, 1); 13498 __ vpermq($tmp2$$XMMRegister, $tmp2$$XMMRegister, 0x4E, 1); 13499 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp3$$XMMRegister, 0x00); 13500 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, 0x01); 13501 %} 13502 ins_pipe( pipe_slow ); 13503 %} 13504 13505 // Shorts/Chars vector mul 13506 instruct vmul2S(vecS dst, vecS src) %{ 13507 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 13508 match(Set dst (MulVS dst src)); 13509 format %{ "pmullw $dst,$src\t! mul packed2S" %} 13510 ins_encode %{ 13511 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 13512 %} 13513 ins_pipe( pipe_slow ); 13514 %} 13515 13516 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 13517 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 13518 match(Set dst (MulVS src1 src2)); 13519 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 13520 ins_encode %{ 13521 int vector_len = 0; 13522 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13523 %} 13524 ins_pipe( pipe_slow ); 13525 %} 13526 13527 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 13528 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 13529 match(Set dst (MulVS src1 src2)); 13530 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 13531 ins_encode %{ 13532 int vector_len = 0; 13533 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13534 %} 13535 ins_pipe( pipe_slow ); 13536 %} 13537 13538 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 13539 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 13540 match(Set dst (MulVS dst src2)); 13541 effect(TEMP src1); 13542 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 13543 ins_encode %{ 13544 int vector_len = 0; 13545 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13546 %} 13547 ins_pipe( pipe_slow ); 13548 %} 13549 13550 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 13551 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 13552 match(Set dst (MulVS src (LoadVector mem))); 13553 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 13554 ins_encode %{ 13555 int vector_len = 0; 13556 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13557 %} 13558 ins_pipe( pipe_slow ); 13559 %} 13560 13561 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 13562 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 13563 match(Set dst (MulVS src (LoadVector mem))); 13564 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 13565 ins_encode %{ 13566 int vector_len = 0; 13567 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13568 %} 13569 ins_pipe( pipe_slow ); 13570 %} 13571 13572 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 13573 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 13574 match(Set dst (MulVS dst (LoadVector mem))); 13575 effect(TEMP src); 13576 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 13577 ins_encode %{ 13578 int vector_len = 0; 13579 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13580 %} 13581 ins_pipe( pipe_slow ); 13582 %} 13583 13584 instruct vmul4S(vecD dst, vecD src) %{ 13585 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 13586 match(Set dst (MulVS dst src)); 13587 format %{ "pmullw $dst,$src\t! mul packed4S" %} 13588 ins_encode %{ 13589 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 13590 %} 13591 ins_pipe( pipe_slow ); 13592 %} 13593 13594 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 13595 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 13596 match(Set dst (MulVS src1 src2)); 13597 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 13598 ins_encode %{ 13599 int vector_len = 0; 13600 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13601 %} 13602 ins_pipe( pipe_slow ); 13603 %} 13604 13605 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 13606 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 13607 match(Set dst (MulVS src1 src2)); 13608 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 13609 ins_encode %{ 13610 int vector_len = 0; 13611 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13612 %} 13613 ins_pipe( pipe_slow ); 13614 %} 13615 13616 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 13617 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 13618 match(Set dst (MulVS dst src2)); 13619 effect(TEMP src1); 13620 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 13621 ins_encode %{ 13622 int vector_len = 0; 13623 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13624 %} 13625 ins_pipe( pipe_slow ); 13626 %} 13627 13628 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 13629 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 13630 match(Set dst (MulVS src (LoadVector mem))); 13631 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 13632 ins_encode %{ 13633 int vector_len = 0; 13634 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13635 %} 13636 ins_pipe( pipe_slow ); 13637 %} 13638 13639 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 13640 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 13641 match(Set dst (MulVS src (LoadVector mem))); 13642 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 13643 ins_encode %{ 13644 int vector_len = 0; 13645 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13646 %} 13647 ins_pipe( pipe_slow ); 13648 %} 13649 13650 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 13651 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 13652 match(Set dst (MulVS dst (LoadVector mem))); 13653 effect(TEMP src); 13654 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 13655 ins_encode %{ 13656 int vector_len = 0; 13657 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13658 %} 13659 ins_pipe( pipe_slow ); 13660 %} 13661 13662 instruct vmul8S(vecX dst, vecX src) %{ 13663 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 13664 match(Set dst (MulVS dst src)); 13665 format %{ "pmullw $dst,$src\t! mul packed8S" %} 13666 ins_encode %{ 13667 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 13668 %} 13669 ins_pipe( pipe_slow ); 13670 %} 13671 13672 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 13673 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 13674 match(Set dst (MulVS src1 src2)); 13675 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 13676 ins_encode %{ 13677 int vector_len = 0; 13678 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13679 %} 13680 ins_pipe( pipe_slow ); 13681 %} 13682 13683 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 13684 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 13685 match(Set dst (MulVS src1 src2)); 13686 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 13687 ins_encode %{ 13688 int vector_len = 0; 13689 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13690 %} 13691 ins_pipe( pipe_slow ); 13692 %} 13693 13694 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 13695 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 13696 match(Set dst (MulVS dst src2)); 13697 effect(TEMP src1); 13698 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 13699 ins_encode %{ 13700 int vector_len = 0; 13701 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13702 %} 13703 ins_pipe( pipe_slow ); 13704 %} 13705 13706 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 13707 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 13708 match(Set dst (MulVS src (LoadVector mem))); 13709 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 13710 ins_encode %{ 13711 int vector_len = 0; 13712 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13713 %} 13714 ins_pipe( pipe_slow ); 13715 %} 13716 13717 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 13718 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 13719 match(Set dst (MulVS src (LoadVector mem))); 13720 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 13721 ins_encode %{ 13722 int vector_len = 0; 13723 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13724 %} 13725 ins_pipe( pipe_slow ); 13726 %} 13727 13728 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 13729 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 13730 match(Set dst (MulVS dst (LoadVector mem))); 13731 effect(TEMP src); 13732 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 13733 ins_encode %{ 13734 int vector_len = 0; 13735 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13736 %} 13737 ins_pipe( pipe_slow ); 13738 %} 13739 13740 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 13741 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 13742 match(Set dst (MulVS src1 src2)); 13743 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 13744 ins_encode %{ 13745 int vector_len = 1; 13746 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13747 %} 13748 ins_pipe( pipe_slow ); 13749 %} 13750 13751 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 13752 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 13753 match(Set dst (MulVS src1 src2)); 13754 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 13755 ins_encode %{ 13756 int vector_len = 1; 13757 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13758 %} 13759 ins_pipe( pipe_slow ); 13760 %} 13761 13762 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 13763 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 13764 match(Set dst (MulVS dst src2)); 13765 effect(TEMP src1); 13766 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 13767 ins_encode %{ 13768 int vector_len = 1; 13769 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13770 %} 13771 ins_pipe( pipe_slow ); 13772 %} 13773 13774 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 13775 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 13776 match(Set dst (MulVS src (LoadVector mem))); 13777 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 13778 ins_encode %{ 13779 int vector_len = 1; 13780 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13781 %} 13782 ins_pipe( pipe_slow ); 13783 %} 13784 13785 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 13786 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 13787 match(Set dst (MulVS src (LoadVector mem))); 13788 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 13789 ins_encode %{ 13790 int vector_len = 1; 13791 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13792 %} 13793 ins_pipe( pipe_slow ); 13794 %} 13795 13796 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 13797 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 13798 match(Set dst (MulVS dst (LoadVector mem))); 13799 effect(TEMP src); 13800 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 13801 ins_encode %{ 13802 int vector_len = 1; 13803 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13804 %} 13805 ins_pipe( pipe_slow ); 13806 %} 13807 13808 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13809 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 13810 match(Set dst (MulVS src1 src2)); 13811 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 13812 ins_encode %{ 13813 int vector_len = 2; 13814 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13815 %} 13816 ins_pipe( pipe_slow ); 13817 %} 13818 13819 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 13820 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 13821 match(Set dst (MulVS src (LoadVector mem))); 13822 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 13823 ins_encode %{ 13824 int vector_len = 2; 13825 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13826 %} 13827 ins_pipe( pipe_slow ); 13828 %} 13829 13830 // Integers vector mul (sse4_1) 13831 instruct vmul2I(vecD dst, vecD src) %{ 13832 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 13833 match(Set dst (MulVI dst src)); 13834 format %{ "pmulld $dst,$src\t! mul packed2I" %} 13835 ins_encode %{ 13836 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 13837 %} 13838 ins_pipe( pipe_slow ); 13839 %} 13840 13841 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 13842 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13843 match(Set dst (MulVI src1 src2)); 13844 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 13845 ins_encode %{ 13846 int vector_len = 0; 13847 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13848 %} 13849 ins_pipe( pipe_slow ); 13850 %} 13851 13852 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 13853 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13854 match(Set dst (MulVI src (LoadVector mem))); 13855 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 13856 ins_encode %{ 13857 int vector_len = 0; 13858 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13859 %} 13860 ins_pipe( pipe_slow ); 13861 %} 13862 13863 instruct vmul4I(vecX dst, vecX src) %{ 13864 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 13865 match(Set dst (MulVI dst src)); 13866 format %{ "pmulld $dst,$src\t! mul packed4I" %} 13867 ins_encode %{ 13868 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 13869 %} 13870 ins_pipe( pipe_slow ); 13871 %} 13872 13873 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 13874 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13875 match(Set dst (MulVI src1 src2)); 13876 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 13877 ins_encode %{ 13878 int vector_len = 0; 13879 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13880 %} 13881 ins_pipe( pipe_slow ); 13882 %} 13883 13884 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 13885 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13886 match(Set dst (MulVI src (LoadVector mem))); 13887 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 13888 ins_encode %{ 13889 int vector_len = 0; 13890 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13891 %} 13892 ins_pipe( pipe_slow ); 13893 %} 13894 13895 // Long vector mul 13896 13897 instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ 13898 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); 13899 match(Set dst (MulVL dst src2)); 13900 effect(TEMP dst, TEMP tmp); 13901 format %{ "pshufd $tmp,$src2, 177\n\t" 13902 "pmulld $tmp,$dst\n\t" 13903 "phaddd $tmp,$tmp\n\t" 13904 "pmovzxdq $tmp,$tmp\n\t" 13905 "psllq $tmp, 32\n\t" 13906 "pmuludq $dst,$src2\n\t" 13907 "paddq $dst,$tmp\n\t! mul packed2L" %} 13908 13909 ins_encode %{ 13910 int vector_len = 0; 13911 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 13912 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 13913 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 13914 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 13915 __ psllq($tmp$$XMMRegister, 32); 13916 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 13917 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 13918 %} 13919 ins_pipe( pipe_slow ); 13920 %} 13921 13922 instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ 13923 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); 13924 match(Set dst (MulVL src1 src2)); 13925 effect(TEMP tmp1, TEMP tmp); 13926 format %{ "vpshufd $tmp,$src2\n\t" 13927 "vpmulld $tmp,$src1,$tmp\n\t" 13928 "vphaddd $tmp,$tmp,$tmp\n\t" 13929 "vpmovzxdq $tmp,$tmp\n\t" 13930 "vpsllq $tmp,$tmp\n\t" 13931 "vpmuludq $tmp1,$src1,$src2\n\t" 13932 "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} 13933 ins_encode %{ 13934 int vector_len = 0; 13935 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 13936 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 13937 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13938 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13939 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 13940 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13941 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13942 %} 13943 ins_pipe( pipe_slow ); 13944 %} 13945 13946 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 13947 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 13948 match(Set dst (MulVL src1 src2)); 13949 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 13950 ins_encode %{ 13951 int vector_len = 0; 13952 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13953 %} 13954 ins_pipe( pipe_slow ); 13955 %} 13956 13957 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 13958 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 13959 match(Set dst (MulVL src (LoadVector mem))); 13960 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 13961 ins_encode %{ 13962 int vector_len = 0; 13963 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13964 %} 13965 ins_pipe( pipe_slow ); 13966 %} 13967 13968 instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ 13969 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); 13970 match(Set dst (MulVL src1 src2)); 13971 effect(TEMP tmp1, TEMP tmp); 13972 format %{ "vpshufd $tmp,$src2\n\t" 13973 "vpmulld $tmp,$src1,$tmp\n\t" 13974 "vphaddd $tmp,$tmp,$tmp\n\t" 13975 "vpmovzxdq $tmp,$tmp\n\t" 13976 "vpsllq $tmp,$tmp\n\t" 13977 "vpmuludq $tmp1,$src1,$src2\n\t" 13978 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 13979 ins_encode %{ 13980 int vector_len = 1; 13981 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 13982 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 13983 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 13984 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13985 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13986 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 13987 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13988 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13989 %} 13990 ins_pipe( pipe_slow ); 13991 %} 13992 13993 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 13994 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 13995 match(Set dst (MulVL src1 src2)); 13996 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 13997 ins_encode %{ 13998 int vector_len = 1; 13999 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14000 %} 14001 ins_pipe( pipe_slow ); 14002 %} 14003 14004 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 14005 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 14006 match(Set dst (MulVL src (LoadVector mem))); 14007 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 14008 ins_encode %{ 14009 int vector_len = 1; 14010 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14011 %} 14012 ins_pipe( pipe_slow ); 14013 %} 14014 14015 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14016 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 14017 match(Set dst (MulVL src1 src2)); 14018 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 14019 ins_encode %{ 14020 int vector_len = 2; 14021 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14022 %} 14023 ins_pipe( pipe_slow ); 14024 %} 14025 14026 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 14027 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 14028 match(Set dst (MulVL src (LoadVector mem))); 14029 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 14030 ins_encode %{ 14031 int vector_len = 2; 14032 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14033 %} 14034 ins_pipe( pipe_slow ); 14035 %} 14036 14037 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 14038 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 14039 match(Set dst (MulVI src1 src2)); 14040 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 14041 ins_encode %{ 14042 int vector_len = 1; 14043 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14044 %} 14045 ins_pipe( pipe_slow ); 14046 %} 14047 14048 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 14049 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 14050 match(Set dst (MulVI src (LoadVector mem))); 14051 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 14052 ins_encode %{ 14053 int vector_len = 1; 14054 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14055 %} 14056 ins_pipe( pipe_slow ); 14057 %} 14058 14059 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14060 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14061 match(Set dst (MulVI src1 src2)); 14062 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 14063 ins_encode %{ 14064 int vector_len = 2; 14065 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14066 %} 14067 ins_pipe( pipe_slow ); 14068 %} 14069 14070 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 14071 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14072 match(Set dst (MulVI src (LoadVector mem))); 14073 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 14074 ins_encode %{ 14075 int vector_len = 2; 14076 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14077 %} 14078 ins_pipe( pipe_slow ); 14079 %} 14080 14081 // Floats vector mul 14082 instruct vmul2F(vecD dst, vecD src) %{ 14083 predicate(n->as_Vector()->length() == 2); 14084 match(Set dst (MulVF dst src)); 14085 format %{ "mulps $dst,$src\t! mul packed2F" %} 14086 ins_encode %{ 14087 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 14088 %} 14089 ins_pipe( pipe_slow ); 14090 %} 14091 14092 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 14093 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14094 match(Set dst (MulVF src1 src2)); 14095 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 14096 ins_encode %{ 14097 int vector_len = 0; 14098 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14099 %} 14100 ins_pipe( pipe_slow ); 14101 %} 14102 14103 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 14104 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14105 match(Set dst (MulVF src (LoadVector mem))); 14106 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 14107 ins_encode %{ 14108 int vector_len = 0; 14109 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14110 %} 14111 ins_pipe( pipe_slow ); 14112 %} 14113 14114 instruct vmul4F(vecX dst, vecX src) %{ 14115 predicate(n->as_Vector()->length() == 4); 14116 match(Set dst (MulVF dst src)); 14117 format %{ "mulps $dst,$src\t! mul packed4F" %} 14118 ins_encode %{ 14119 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 14120 %} 14121 ins_pipe( pipe_slow ); 14122 %} 14123 14124 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 14125 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14126 match(Set dst (MulVF src1 src2)); 14127 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 14128 ins_encode %{ 14129 int vector_len = 0; 14130 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14131 %} 14132 ins_pipe( pipe_slow ); 14133 %} 14134 14135 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 14136 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14137 match(Set dst (MulVF src (LoadVector mem))); 14138 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 14139 ins_encode %{ 14140 int vector_len = 0; 14141 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14142 %} 14143 ins_pipe( pipe_slow ); 14144 %} 14145 14146 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 14147 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14148 match(Set dst (MulVF src1 src2)); 14149 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 14150 ins_encode %{ 14151 int vector_len = 1; 14152 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14153 %} 14154 ins_pipe( pipe_slow ); 14155 %} 14156 14157 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 14158 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14159 match(Set dst (MulVF src (LoadVector mem))); 14160 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 14161 ins_encode %{ 14162 int vector_len = 1; 14163 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14164 %} 14165 ins_pipe( pipe_slow ); 14166 %} 14167 14168 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14169 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14170 match(Set dst (MulVF src1 src2)); 14171 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 14172 ins_encode %{ 14173 int vector_len = 2; 14174 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14175 %} 14176 ins_pipe( pipe_slow ); 14177 %} 14178 14179 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 14180 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14181 match(Set dst (MulVF src (LoadVector mem))); 14182 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 14183 ins_encode %{ 14184 int vector_len = 2; 14185 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14186 %} 14187 ins_pipe( pipe_slow ); 14188 %} 14189 14190 // Doubles vector mul 14191 instruct vmul2D(vecX dst, vecX src) %{ 14192 predicate(n->as_Vector()->length() == 2); 14193 match(Set dst (MulVD dst src)); 14194 format %{ "mulpd $dst,$src\t! mul packed2D" %} 14195 ins_encode %{ 14196 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 14197 %} 14198 ins_pipe( pipe_slow ); 14199 %} 14200 14201 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 14202 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14203 match(Set dst (MulVD src1 src2)); 14204 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 14205 ins_encode %{ 14206 int vector_len = 0; 14207 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14208 %} 14209 ins_pipe( pipe_slow ); 14210 %} 14211 14212 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 14213 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14214 match(Set dst (MulVD src (LoadVector mem))); 14215 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 14216 ins_encode %{ 14217 int vector_len = 0; 14218 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14219 %} 14220 ins_pipe( pipe_slow ); 14221 %} 14222 14223 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 14224 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14225 match(Set dst (MulVD src1 src2)); 14226 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 14227 ins_encode %{ 14228 int vector_len = 1; 14229 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14230 %} 14231 ins_pipe( pipe_slow ); 14232 %} 14233 14234 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 14235 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14236 match(Set dst (MulVD src (LoadVector mem))); 14237 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 14238 ins_encode %{ 14239 int vector_len = 1; 14240 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14241 %} 14242 ins_pipe( pipe_slow ); 14243 %} 14244 14245 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14246 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14247 match(Set dst (MulVD src1 src2)); 14248 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 14249 ins_encode %{ 14250 int vector_len = 2; 14251 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14252 %} 14253 ins_pipe( pipe_slow ); 14254 %} 14255 14256 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 14257 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14258 match(Set dst (MulVD src (LoadVector mem))); 14259 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 14260 ins_encode %{ 14261 int vector_len = 2; 14262 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14263 %} 14264 ins_pipe( pipe_slow ); 14265 %} 14266 14267 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 14268 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 14269 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 14270 effect(TEMP dst, USE src1, USE src2); 14271 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 14272 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 14273 %} 14274 ins_encode %{ 14275 int vector_len = 1; 14276 int cond = (Assembler::Condition)($copnd$$cmpcode); 14277 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 14278 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14279 %} 14280 ins_pipe( pipe_slow ); 14281 %} 14282 14283 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 14284 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 14285 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 14286 effect(TEMP dst, USE src1, USE src2); 14287 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 14288 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 14289 %} 14290 ins_encode %{ 14291 int vector_len = 1; 14292 int cond = (Assembler::Condition)($copnd$$cmpcode); 14293 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 14294 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14295 %} 14296 ins_pipe( pipe_slow ); 14297 %} 14298 14299 // --------------------------------- DIV -------------------------------------- 14300 14301 // Floats vector div 14302 instruct vdiv2F(vecD dst, vecD src) %{ 14303 predicate(n->as_Vector()->length() == 2); 14304 match(Set dst (DivVF dst src)); 14305 format %{ "divps $dst,$src\t! div packed2F" %} 14306 ins_encode %{ 14307 __ divps($dst$$XMMRegister, $src$$XMMRegister); 14308 %} 14309 ins_pipe( pipe_slow ); 14310 %} 14311 14312 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 14313 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14314 match(Set dst (DivVF src1 src2)); 14315 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 14316 ins_encode %{ 14317 int vector_len = 0; 14318 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14319 %} 14320 ins_pipe( pipe_slow ); 14321 %} 14322 14323 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 14324 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14325 match(Set dst (DivVF src (LoadVector mem))); 14326 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 14327 ins_encode %{ 14328 int vector_len = 0; 14329 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14330 %} 14331 ins_pipe( pipe_slow ); 14332 %} 14333 14334 instruct vdiv4F(vecX dst, vecX src) %{ 14335 predicate(n->as_Vector()->length() == 4); 14336 match(Set dst (DivVF dst src)); 14337 format %{ "divps $dst,$src\t! div packed4F" %} 14338 ins_encode %{ 14339 __ divps($dst$$XMMRegister, $src$$XMMRegister); 14340 %} 14341 ins_pipe( pipe_slow ); 14342 %} 14343 14344 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 14345 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14346 match(Set dst (DivVF src1 src2)); 14347 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 14348 ins_encode %{ 14349 int vector_len = 0; 14350 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14351 %} 14352 ins_pipe( pipe_slow ); 14353 %} 14354 14355 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 14356 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14357 match(Set dst (DivVF src (LoadVector mem))); 14358 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 14359 ins_encode %{ 14360 int vector_len = 0; 14361 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14362 %} 14363 ins_pipe( pipe_slow ); 14364 %} 14365 14366 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 14367 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14368 match(Set dst (DivVF src1 src2)); 14369 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 14370 ins_encode %{ 14371 int vector_len = 1; 14372 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14373 %} 14374 ins_pipe( pipe_slow ); 14375 %} 14376 14377 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 14378 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14379 match(Set dst (DivVF src (LoadVector mem))); 14380 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 14381 ins_encode %{ 14382 int vector_len = 1; 14383 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14384 %} 14385 ins_pipe( pipe_slow ); 14386 %} 14387 14388 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14389 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 14390 match(Set dst (DivVF src1 src2)); 14391 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 14392 ins_encode %{ 14393 int vector_len = 2; 14394 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14395 %} 14396 ins_pipe( pipe_slow ); 14397 %} 14398 14399 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 14400 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 14401 match(Set dst (DivVF src (LoadVector mem))); 14402 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 14403 ins_encode %{ 14404 int vector_len = 2; 14405 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14406 %} 14407 ins_pipe( pipe_slow ); 14408 %} 14409 14410 // Doubles vector div 14411 instruct vdiv2D(vecX dst, vecX src) %{ 14412 predicate(n->as_Vector()->length() == 2); 14413 match(Set dst (DivVD dst src)); 14414 format %{ "divpd $dst,$src\t! div packed2D" %} 14415 ins_encode %{ 14416 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 14417 %} 14418 ins_pipe( pipe_slow ); 14419 %} 14420 14421 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 14422 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14423 match(Set dst (DivVD src1 src2)); 14424 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 14425 ins_encode %{ 14426 int vector_len = 0; 14427 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14428 %} 14429 ins_pipe( pipe_slow ); 14430 %} 14431 14432 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 14433 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14434 match(Set dst (DivVD src (LoadVector mem))); 14435 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 14436 ins_encode %{ 14437 int vector_len = 0; 14438 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14439 %} 14440 ins_pipe( pipe_slow ); 14441 %} 14442 14443 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 14444 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14445 match(Set dst (DivVD src1 src2)); 14446 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 14447 ins_encode %{ 14448 int vector_len = 1; 14449 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14450 %} 14451 ins_pipe( pipe_slow ); 14452 %} 14453 14454 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 14455 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14456 match(Set dst (DivVD src (LoadVector mem))); 14457 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 14458 ins_encode %{ 14459 int vector_len = 1; 14460 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14461 %} 14462 ins_pipe( pipe_slow ); 14463 %} 14464 14465 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14466 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14467 match(Set dst (DivVD src1 src2)); 14468 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 14469 ins_encode %{ 14470 int vector_len = 2; 14471 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14472 %} 14473 ins_pipe( pipe_slow ); 14474 %} 14475 14476 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 14477 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14478 match(Set dst (DivVD src (LoadVector mem))); 14479 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 14480 ins_encode %{ 14481 int vector_len = 2; 14482 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14483 %} 14484 ins_pipe( pipe_slow ); 14485 %} 14486 14487 // ------------------------------ Min --------------------------------------- 14488 // Byte vector Min 14489 instruct min8B_reg(vecD dst, vecD src1, vecD src2) %{ 14490 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14491 match(Set dst (MinV src1 src2)); 14492 effect(TEMP dst); 14493 format %{ "movdqu $dst,$src1\n\t" 14494 "pminsb $dst,$src2\t! " %} 14495 ins_encode %{ 14496 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14497 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 14498 %} 14499 ins_pipe( pipe_slow ); 14500 %} 14501 14502 instruct min8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14504 match(Set dst (MinV src1 src2)); 14505 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14506 ins_encode %{ 14507 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 14508 %} 14509 ins_pipe( pipe_slow ); 14510 %} 14511 14512 instruct min16B_reg(vecX dst, vecX src1, vecX src2) %{ 14513 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14514 match(Set dst (MinV src1 src2)); 14515 effect(TEMP dst); 14516 format %{ "movdqu $dst,$src1\n\t" 14517 "pminsb $dst,$src2\t! " %} 14518 ins_encode %{ 14519 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14520 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 14521 %} 14522 ins_pipe( pipe_slow ); 14523 %} 14524 14525 instruct min16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14526 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14527 match(Set dst (MinV src1 src2)); 14528 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14529 ins_encode %{ 14530 int vector_len = 0; 14531 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14532 %} 14533 ins_pipe( pipe_slow ); 14534 %} 14535 14536 instruct min32B_reg(vecY dst, vecY src1, vecY src2) %{ 14537 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14538 match(Set dst (MinV src1 src2)); 14539 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14540 ins_encode %{ 14541 int vector_len = 1; 14542 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14543 %} 14544 ins_pipe( pipe_slow ); 14545 %} 14546 14547 instruct min64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14548 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14549 match(Set dst (MinV src1 src2)); 14550 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14551 ins_encode %{ 14552 int vector_len = 2; 14553 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14554 %} 14555 ins_pipe( pipe_slow ); 14556 %} 14557 14558 //Short vector Min 14559 instruct min4S_reg(vecD dst, vecD src1, vecD src2) %{ 14560 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14561 match(Set dst (MinV src1 src2)); 14562 effect(TEMP dst); 14563 format %{ "movsd $dst,$src1\n\t" 14564 "pminsw $dst,$src2\t! " %} 14565 ins_encode %{ 14566 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14567 __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); 14568 %} 14569 ins_pipe( pipe_slow ); 14570 %} 14571 14572 instruct min4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14573 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14574 match(Set dst (MinV src1 src2)); 14575 effect(TEMP dst); 14576 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14577 ins_encode %{ 14578 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 14579 %} 14580 ins_pipe( pipe_slow ); 14581 %} 14582 14583 instruct min8S_reg(vecX dst, vecX src1, vecX src2) %{ 14584 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14585 match(Set dst (MinV src1 src2)); 14586 effect(TEMP dst); 14587 format %{ "movdqu $dst,$src1\n\t" 14588 "pminsw $dst,$src2\t! " %} 14589 ins_encode %{ 14590 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14591 __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); 14592 %} 14593 ins_pipe( pipe_slow ); 14594 %} 14595 14596 instruct min8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14597 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14598 match(Set dst (MinV src1 src2)); 14599 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14600 ins_encode %{ 14601 int vector_len = 0; 14602 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14603 %} 14604 ins_pipe( pipe_slow ); 14605 %} 14606 14607 instruct min16S_reg(vecY dst, vecY src1, vecY src2) %{ 14608 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14609 match(Set dst (MinV src1 src2)); 14610 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14611 ins_encode %{ 14612 int vector_len = 1; 14613 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14614 %} 14615 ins_pipe( pipe_slow ); 14616 %} 14617 14618 instruct min32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14619 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14620 match(Set dst (MinV src1 src2)); 14621 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14622 ins_encode %{ 14623 int vector_len = 2; 14624 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14625 %} 14626 ins_pipe( pipe_slow ); 14627 %} 14628 14629 // Int vector Min 14630 instruct min2I_reg(vecD dst, vecD src1, vecD src2) %{ 14631 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14632 match(Set dst (MinV src1 src2)); 14633 effect(TEMP dst); 14634 format %{ "movsd $dst,$src1\n\t" 14635 "pminsd $dst,$src2\t! " %} 14636 ins_encode %{ 14637 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14638 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 14639 %} 14640 ins_pipe( pipe_slow ); 14641 %} 14642 14643 instruct min2I_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14644 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14645 match(Set dst (MinV src1 src2)); 14646 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14647 ins_encode %{ 14648 int vector_len = 0; 14649 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14650 %} 14651 ins_pipe( pipe_slow ); 14652 %} 14653 14654 instruct min4I_reg(vecX dst, vecX src1, vecX src2) %{ 14655 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14656 match(Set dst (MinV src1 src2)); 14657 effect(TEMP dst); 14658 format %{ "movdqu $dst,$src1\n\t" 14659 "pminsd $dst,$src2\t! " %} 14660 ins_encode %{ 14661 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14662 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 14663 %} 14664 ins_pipe( pipe_slow ); 14665 %} 14666 14667 instruct min4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14668 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14669 match(Set dst (MinV src1 src2)); 14670 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14671 ins_encode %{ 14672 int vector_len = 0; 14673 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14674 %} 14675 ins_pipe( pipe_slow ); 14676 %} 14677 14678 instruct min4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14679 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14680 match(Set dst (MinV src1 src2)); 14681 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14682 ins_encode %{ 14683 int vector_len = 0; 14684 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14685 %} 14686 ins_pipe( pipe_slow ); 14687 %} 14688 14689 instruct min8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14690 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14691 match(Set dst (MinV src1 src2)); 14692 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14693 ins_encode %{ 14694 int vector_len = 1; 14695 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14696 %} 14697 ins_pipe( pipe_slow ); 14698 %} 14699 14700 instruct min8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14701 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14702 match(Set dst (MinV src1 src2)); 14703 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14704 ins_encode %{ 14705 int vector_len = 1; 14706 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14707 %} 14708 ins_pipe( pipe_slow ); 14709 %} 14710 14711 instruct min16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14712 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14713 match(Set dst (MinV src1 src2)); 14714 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14715 ins_encode %{ 14716 int vector_len = 2; 14717 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14718 %} 14719 ins_pipe( pipe_slow ); 14720 %} 14721 14722 // Long vector Min 14723 instruct minL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 14724 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14725 match(Set dst (MinV src1 src2)); 14726 effect(TEMP dst, TEMP tmp); 14727 format %{ "movsd $tmp,$src1\n\t" 14728 "movsd $dst,$src1\n\t" 14729 "pcmpgtq $tmp,$src2\n\t" 14730 "blendvpd $dst,$src2\t! " %} 14731 ins_encode %{ 14732 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 14733 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14734 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 14735 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 14736 %} 14737 ins_pipe( pipe_slow ); 14738 %} 14739 14740 instruct min1L_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14741 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14742 match(Set dst (MinV src1 src2)); 14743 effect(TEMP dst); 14744 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 14745 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 14746 ins_encode %{ 14747 int vector_len = 0; 14748 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14749 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14750 %} 14751 ins_pipe( pipe_slow ); 14752 %} 14753 14754 instruct min2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 14755 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14756 match(Set dst (MinV src1 src2)); 14757 effect(TEMP dst, TEMP tmp); 14758 format %{ "movdqu $tmp,$src1\n\t" 14759 "movdqu $dst,$src1\n\t" 14760 "pcmpgtq $tmp,$src2\n\t" 14761 "blendvpd $dst,$src2\t! " %} 14762 ins_encode %{ 14763 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 14764 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14765 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 14766 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 14767 %} 14768 ins_pipe( pipe_slow ); 14769 %} 14770 14771 instruct min2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14772 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14773 match(Set dst (MinV src1 src2)); 14774 effect(TEMP dst); 14775 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 14776 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 14777 ins_encode %{ 14778 int vector_len = 0; 14779 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14780 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14781 %} 14782 ins_pipe( pipe_slow ); 14783 %} 14784 14785 instruct min4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14786 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14787 match(Set dst (MinV src1 src2)); 14788 effect(TEMP dst); 14789 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 14790 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 14791 ins_encode %{ 14792 int vector_len = 1; 14793 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14794 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14795 %} 14796 ins_pipe( pipe_slow ); 14797 %} 14798 14799 instruct min2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14800 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14801 match(Set dst (MinV src1 src2)); 14802 format %{ "vpminsq $dst,$src1,src2\t! " %} 14803 ins_encode %{ 14804 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 14805 %} 14806 ins_pipe( pipe_slow ); 14807 %} 14808 14809 instruct min4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14810 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14811 match(Set dst (MinV src1 src2)); 14812 format %{ "vpminsq $dst,$src1,src2\t! " %} 14813 ins_encode %{ 14814 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 14815 %} 14816 ins_pipe( pipe_slow ); 14817 %} 14818 14819 instruct min8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14820 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14821 match(Set dst (MinV src1 src2)); 14822 format %{ "vpminsq $dst,$src1,src2\t! " %} 14823 ins_encode %{ 14824 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); 14825 %} 14826 ins_pipe( pipe_slow ); 14827 %} 14828 14829 // Float vector Min 14830 instruct min2F_reg(vecD dst, vecD src1, vecD src2) %{ 14831 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14832 match(Set dst (MinV src1 src2)); 14833 effect(TEMP dst); 14834 format %{ "movsd $dst,$src1\n\t" 14835 "minps $dst,$src2\t! " %} 14836 ins_encode %{ 14837 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14838 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 14839 %} 14840 ins_pipe( pipe_slow ); 14841 %} 14842 14843 instruct min2F_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14844 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14845 match(Set dst (MinV src1 src2)); 14846 format %{ "vminps $dst,$src1,$src2\t! " %} 14847 ins_encode %{ 14848 int vector_len = 0; 14849 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14850 %} 14851 ins_pipe( pipe_slow ); 14852 %} 14853 14854 instruct min4F_reg(vecX dst, vecX src1, vecX src2) %{ 14855 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14856 match(Set dst (MinV src1 src2)); 14857 effect(TEMP dst); 14858 format %{ "movdqu $dst,$src1\n\t" 14859 "minps $dst,$src2\t! " %} 14860 ins_encode %{ 14861 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14862 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 14863 %} 14864 ins_pipe( pipe_slow ); 14865 %} 14866 14867 instruct min4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14868 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14869 match(Set dst (MinV src1 src2)); 14870 format %{ "vminps $dst,$src1,$src2\t! " %} 14871 ins_encode %{ 14872 int vector_len = 0; 14873 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14874 %} 14875 ins_pipe( pipe_slow ); 14876 %} 14877 14878 instruct min4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14879 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14880 match(Set dst (MinV src1 src2)); 14881 format %{ "vminps $dst,$src1,$src2\t! " %} 14882 ins_encode %{ 14883 int vector_len = 0; 14884 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14885 %} 14886 ins_pipe( pipe_slow ); 14887 %} 14888 14889 instruct min8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14890 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14891 match(Set dst (MinV src1 src2)); 14892 format %{ "vminps $dst,$src1,$src2\t! " %} 14893 ins_encode %{ 14894 int vector_len = 1; 14895 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14896 %} 14897 ins_pipe( pipe_slow ); 14898 %} 14899 14900 instruct min8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14901 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14902 match(Set dst (MinV src1 src2)); 14903 format %{ "vminps $dst,$src1,$src2\t! " %} 14904 ins_encode %{ 14905 int vector_len = 1; 14906 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14907 %} 14908 ins_pipe( pipe_slow ); 14909 %} 14910 14911 instruct min16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14912 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14913 match(Set dst (MinV src1 src2)); 14914 format %{ "vminps $dst,$src1,$src2\t! " %} 14915 ins_encode %{ 14916 int vector_len = 2; 14917 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14918 %} 14919 ins_pipe( pipe_slow ); 14920 %} 14921 14922 // Double vector Min 14923 instruct minD_reg(vecD dst, vecD src1, vecD src2) %{ 14924 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14925 match(Set dst (MinV src1 src2)); 14926 effect(TEMP dst); 14927 format %{ "movsd $dst,$src1\n\t" 14928 "minpd $dst,$src2\t! " %} 14929 ins_encode %{ 14930 int vector_len = 0; 14931 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14932 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 14933 %} 14934 ins_pipe( pipe_slow ); 14935 %} 14936 14937 instruct min1D_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14938 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14939 match(Set dst (MinV src1 src2)); 14940 format %{ "vminpd $dst,$src1,$src2\t! " %} 14941 ins_encode %{ 14942 int vector_len = 0; 14943 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14944 %} 14945 ins_pipe( pipe_slow ); 14946 %} 14947 14948 instruct min2D_reg(vecX dst, vecX src1, vecX src2) %{ 14949 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14950 match(Set dst (MinV src1 src2)); 14951 effect(TEMP dst); 14952 format %{ "movdqu $dst,$src1\n\t" 14953 "minpd $dst,$src2\t! " %} 14954 ins_encode %{ 14955 int vector_len = 0; 14956 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14957 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 14958 %} 14959 ins_pipe( pipe_slow ); 14960 %} 14961 14962 instruct min2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14963 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14964 match(Set dst (MinV src1 src2)); 14965 format %{ "vminpd $dst,$src1,$src2\t! " %} 14966 ins_encode %{ 14967 int vector_len = 0; 14968 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14969 %} 14970 ins_pipe( pipe_slow ); 14971 %} 14972 14973 instruct min2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14974 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14975 match(Set dst (MinV src1 src2)); 14976 format %{ "vminpd $dst,$src1,$src2\t! " %} 14977 ins_encode %{ 14978 int vector_len = 0; 14979 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14980 %} 14981 ins_pipe( pipe_slow ); 14982 %} 14983 14984 instruct min4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14985 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14986 match(Set dst (MinV src1 src2)); 14987 format %{ "vminpd $dst,$src1,$src2\t! " %} 14988 ins_encode %{ 14989 int vector_len = 1; 14990 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14991 %} 14992 ins_pipe( pipe_slow ); 14993 %} 14994 14995 instruct min4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14996 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14997 match(Set dst (MinV src1 src2)); 14998 format %{ "vminpd $dst,$src1,$src2\t! " %} 14999 ins_encode %{ 15000 int vector_len = 1; 15001 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15002 %} 15003 ins_pipe( pipe_slow ); 15004 %} 15005 15006 instruct min8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15007 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15008 match(Set dst (MinV src1 src2)); 15009 format %{ "vminpd $dst,$src1,$src2\t! " %} 15010 ins_encode %{ 15011 int vector_len = 2; 15012 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15013 %} 15014 ins_pipe( pipe_slow ); 15015 %} 15016 15017 // ------------------------------ Max --------------------------------------- 15018 // Byte vector Max 15019 instruct max8B_reg(vecD dst, vecD src1, vecD src2) %{ 15020 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15021 match(Set dst (MaxV src1 src2)); 15022 effect(TEMP dst); 15023 format %{ "movsd $dst,$src1\n\t" 15024 "pmaxsb $dst,$src2\t! " %} 15025 ins_encode %{ 15026 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15027 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 15028 %} 15029 ins_pipe( pipe_slow ); 15030 %} 15031 15032 instruct max8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15033 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15034 match(Set dst (MaxV src1 src2)); 15035 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15036 ins_encode %{ 15037 int vector_len = 0; 15038 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15039 %} 15040 ins_pipe( pipe_slow ); 15041 %} 15042 15043 instruct max16B_reg(vecX dst, vecX src1, vecX src2) %{ 15044 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15045 match(Set dst (MaxV src1 src2)); 15046 effect(TEMP dst); 15047 format %{ "movdqu $dst,$src1\n\t" 15048 "pmaxsb $dst,$src2\t! " %} 15049 ins_encode %{ 15050 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15051 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 15052 %} 15053 ins_pipe( pipe_slow ); 15054 %} 15055 15056 instruct max16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15057 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15058 match(Set dst (MaxV src1 src2)); 15059 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15060 ins_encode %{ 15061 int vector_len = 0; 15062 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15063 %} 15064 ins_pipe( pipe_slow ); 15065 %} 15066 15067 instruct max32B_reg(vecY dst, vecY src1, vecY src2) %{ 15068 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15069 match(Set dst (MaxV src1 src2)); 15070 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15071 ins_encode %{ 15072 int vector_len = 1; 15073 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15074 %} 15075 ins_pipe( pipe_slow ); 15076 %} 15077 15078 instruct max64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15079 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15080 match(Set dst (MaxV src1 src2)); 15081 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15082 ins_encode %{ 15083 int vector_len = 2; 15084 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15085 %} 15086 ins_pipe( pipe_slow ); 15087 %} 15088 15089 //Short vector Max 15090 instruct max4S_reg(vecD dst, vecD src1, vecD src2) %{ 15091 predicate(UseSSE > 1 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15092 match(Set dst (MaxV src1 src2)); 15093 effect(TEMP dst); 15094 format %{ "movsd $dst,$src1\n\t" 15095 "pmaxsw $dst,$src2\t! " %} 15096 ins_encode %{ 15097 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15098 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 15099 %} 15100 ins_pipe( pipe_slow ); 15101 %} 15102 15103 instruct max4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15104 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15105 match(Set dst (MaxV src1 src2)); 15106 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15107 ins_encode %{ 15108 int vector_len = 0; 15109 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15110 %} 15111 ins_pipe( pipe_slow ); 15112 %} 15113 15114 instruct max8S_reg(vecX dst, vecX src1, vecX src2) %{ 15115 predicate(UseSSE > 1 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15116 match(Set dst (MaxV src1 src2)); 15117 effect(TEMP dst); 15118 format %{ "movdqu $dst,$src1\n\t" 15119 "pmaxsw $dst,$src2\t! " %} 15120 ins_encode %{ 15121 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15122 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 15123 %} 15124 ins_pipe( pipe_slow ); 15125 %} 15126 15127 instruct max8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15128 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15129 match(Set dst (MaxV src1 src2)); 15130 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15131 ins_encode %{ 15132 int vector_len = 0; 15133 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15134 %} 15135 ins_pipe( pipe_slow ); 15136 %} 15137 15138 instruct max16S_reg(vecY dst, vecY src1, vecY src2) %{ 15139 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15140 match(Set dst (MaxV src1 src2)); 15141 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15142 ins_encode %{ 15143 int vector_len = 1; 15144 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15145 %} 15146 ins_pipe( pipe_slow ); 15147 %} 15148 15149 instruct max32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15150 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15151 match(Set dst (MaxV src1 src2)); 15152 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15153 ins_encode %{ 15154 int vector_len = 2; 15155 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15156 %} 15157 ins_pipe( pipe_slow ); 15158 %} 15159 15160 // Int vector Max 15161 instruct max2I_reg(vecD dst, vecD src1, vecD src2) %{ 15162 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15163 match(Set dst (MaxV src1 src2)); 15164 effect(TEMP dst); 15165 format %{ "movdqu $dst,$src1\n\t" 15166 "pmaxsd $dst,$src2\t! " %} 15167 ins_encode %{ 15168 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15169 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 15170 %} 15171 ins_pipe( pipe_slow ); 15172 %} 15173 15174 instruct max2I_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15175 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15176 match(Set dst (MaxV src1 src2)); 15177 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15178 ins_encode %{ 15179 int vector_len = 0; 15180 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15181 %} 15182 ins_pipe( pipe_slow ); 15183 %} 15184 15185 instruct max4I_reg(vecX dst, vecX src1, vecX src2) %{ 15186 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15187 match(Set dst (MaxV src1 src2)); 15188 effect(TEMP dst); 15189 format %{ "movdqu $dst,$src1\n\t" 15190 "pmaxsd $dst,$src2\t! " %} 15191 ins_encode %{ 15192 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15193 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 15194 %} 15195 ins_pipe( pipe_slow ); 15196 %} 15197 15198 instruct max4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15199 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15200 match(Set dst (MaxV src1 src2)); 15201 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15202 ins_encode %{ 15203 int vector_len = 0; 15204 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15205 %} 15206 ins_pipe( pipe_slow ); 15207 %} 15208 15209 instruct max4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15210 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15211 match(Set dst (MaxV src1 src2)); 15212 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15213 ins_encode %{ 15214 int vector_len = 0; 15215 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15216 %} 15217 ins_pipe( pipe_slow ); 15218 %} 15219 15220 instruct max8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15221 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15222 match(Set dst (MaxV src1 src2)); 15223 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15224 ins_encode %{ 15225 int vector_len = 1; 15226 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15227 %} 15228 ins_pipe( pipe_slow ); 15229 %} 15230 15231 instruct max8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15232 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15233 match(Set dst (MaxV src1 src2)); 15234 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15235 ins_encode %{ 15236 int vector_len = 1; 15237 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15238 %} 15239 ins_pipe( pipe_slow ); 15240 %} 15241 15242 instruct max16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15243 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15244 match(Set dst (MaxV src1 src2)); 15245 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15246 ins_encode %{ 15247 int vector_len = 2; 15248 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15249 %} 15250 ins_pipe( pipe_slow ); 15251 %} 15252 15253 // Long Vector Max 15254 instruct maxL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 15255 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15256 match(Set dst (MaxV src1 src2)); 15257 effect(TEMP dst, TEMP tmp); 15258 format %{ "movsd $tmp,$src1\n\t" 15259 "movsd $dst,$src1\n\t" 15260 "pcmpgtq $tmp,$src2\n\t" 15261 "blendvpd $dst,$src2\t! " %} 15262 ins_encode %{ 15263 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 15264 __ movsd($dst$$XMMRegister, $src2$$XMMRegister); 15265 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 15266 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister); 15267 %} 15268 ins_pipe( pipe_slow ); 15269 %} 15270 15271 instruct max1L_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15272 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15273 match(Set dst (MaxV src1 src2)); 15274 effect(TEMP dst); 15275 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 15276 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 15277 ins_encode %{ 15278 int vector_len = 0; 15279 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15280 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 15281 %} 15282 ins_pipe( pipe_slow ); 15283 %} 15284 15285 instruct max2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 15286 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15287 match(Set dst (MaxV src1 src2)); 15288 effect(TEMP dst, TEMP tmp); 15289 format %{ "movdqu $tmp,$src2\n\t" 15290 "movdqu $dst,$src1\n\t" 15291 "pcmpgtq $tmp,$src1\n\t" 15292 "blendvpd $dst,$src2\t! " %} 15293 ins_encode %{ 15294 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 15295 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15296 __ pcmpgtq($tmp$$XMMRegister, $src1$$XMMRegister); 15297 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 15298 %} 15299 ins_pipe( pipe_slow ); 15300 %} 15301 15302 instruct max2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15303 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15304 match(Set dst (MaxV src1 src2)); 15305 effect(TEMP dst); 15306 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 15307 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 15308 ins_encode %{ 15309 int vector_len = 0; 15310 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15311 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 15312 %} 15313 ins_pipe( pipe_slow ); 15314 %} 15315 15316 instruct max2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15317 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15318 match(Set dst (MaxV src1 src2)); 15319 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 15320 ins_encode %{ 15321 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 15322 %} 15323 ins_pipe( pipe_slow ); 15324 %} 15325 15326 instruct max4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15327 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15328 match(Set dst (MaxV src1 src2)); 15329 effect(TEMP dst); 15330 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 15331 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 15332 ins_encode %{ 15333 int vector_len = 1; 15334 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15335 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 15336 %} 15337 ins_pipe( pipe_slow ); 15338 %} 15339 15340 instruct max4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15341 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15342 match(Set dst (MaxV src1 src2)); 15343 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 15344 ins_encode %{ 15345 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 15346 %} 15347 ins_pipe( pipe_slow ); 15348 %} 15349 15350 instruct max8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15351 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15352 match(Set dst (MaxV src1 src2)); 15353 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 15354 ins_encode %{ 15355 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); 15356 %} 15357 ins_pipe( pipe_slow ); 15358 %} 15359 15360 // Float Vector Max 15361 instruct max2F_reg(vecD dst, vecD src1, vecD src2) %{ 15362 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15363 match(Set dst (MaxV src1 src2)); 15364 effect(TEMP dst); 15365 format %{ "movsd $dst,$src1\n\t" 15366 "maxps $dst,$src2\t! " %} 15367 ins_encode %{ 15368 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15369 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 15370 %} 15371 ins_pipe( pipe_slow ); 15372 %} 15373 15374 instruct max2F_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15375 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15376 match(Set dst (MaxV src1 src2)); 15377 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15378 ins_encode %{ 15379 int vector_len = 0; 15380 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15381 %} 15382 ins_pipe( pipe_slow ); 15383 %} 15384 15385 instruct max4F_reg(vecX dst, vecX src1, vecX src2) %{ 15386 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15387 match(Set dst (MaxV src1 src2)); 15388 effect(TEMP dst); 15389 format %{ "movdqu $dst,$src1\n\t" 15390 "maxps $dst,$src2\t! " %} 15391 ins_encode %{ 15392 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15393 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 15394 %} 15395 ins_pipe( pipe_slow ); 15396 %} 15397 15398 instruct max4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15399 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15400 match(Set dst (MaxV src1 src2)); 15401 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15402 ins_encode %{ 15403 int vector_len = 0; 15404 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15405 %} 15406 ins_pipe( pipe_slow ); 15407 %} 15408 15409 instruct max4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15410 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15411 match(Set dst (MaxV src1 src2)); 15412 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15413 ins_encode %{ 15414 int vector_len = 0; 15415 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15416 %} 15417 ins_pipe( pipe_slow ); 15418 %} 15419 15420 instruct max8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15421 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15422 match(Set dst (MaxV src1 src2)); 15423 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15424 ins_encode %{ 15425 int vector_len = 1; 15426 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15427 %} 15428 ins_pipe( pipe_slow ); 15429 %} 15430 15431 instruct max8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15432 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15433 match(Set dst (MaxV src1 src2)); 15434 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15435 ins_encode %{ 15436 int vector_len = 1; 15437 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15438 %} 15439 ins_pipe( pipe_slow ); 15440 %} 15441 15442 instruct max16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15443 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15444 match(Set dst (MaxV src1 src2)); 15445 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15446 ins_encode %{ 15447 int vector_len = 2; 15448 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15449 %} 15450 ins_pipe( pipe_slow ); 15451 %} 15452 15453 // Double Vector Max 15454 instruct maxD_reg(vecD dst, vecD src1, vecD src2) %{ 15455 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15456 match(Set dst (MaxV src1 src2)); 15457 effect(TEMP dst); 15458 format %{ "movsd $dst,$src1\n\t" 15459 "maxpd $dst,$src2\t! " %} 15460 ins_encode %{ 15461 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15462 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 15463 %} 15464 ins_pipe( pipe_slow ); 15465 %} 15466 15467 instruct max1D_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15468 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15469 match(Set dst (MaxV src1 src2)); 15470 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15471 ins_encode %{ 15472 int vector_len = 0; 15473 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15474 %} 15475 ins_pipe( pipe_slow ); 15476 %} 15477 15478 instruct max2D_reg(vecX dst, vecX src1, vecX src2) %{ 15479 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15480 match(Set dst (MaxV src1 src2)); 15481 effect(TEMP dst); 15482 format %{ "movdqu $dst,$src1\n\t" 15483 "maxpd $dst,$src2\t! " %} 15484 ins_encode %{ 15485 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15486 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 15487 %} 15488 ins_pipe( pipe_slow ); 15489 %} 15490 15491 instruct max2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15492 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15493 match(Set dst (MaxV src1 src2)); 15494 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15495 ins_encode %{ 15496 int vector_len = 0; 15497 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15498 %} 15499 ins_pipe( pipe_slow ); 15500 %} 15501 15502 instruct max2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15503 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15504 match(Set dst (MaxV src1 src2)); 15505 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15506 ins_encode %{ 15507 int vector_len = 0; 15508 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15509 %} 15510 ins_pipe( pipe_slow ); 15511 %} 15512 15513 instruct max4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15514 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15515 match(Set dst (MaxV src1 src2)); 15516 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15517 ins_encode %{ 15518 int vector_len = 1; 15519 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15520 %} 15521 ins_pipe( pipe_slow ); 15522 %} 15523 15524 instruct max4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15525 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15526 match(Set dst (MaxV src1 src2)); 15527 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15528 ins_encode %{ 15529 int vector_len = 1; 15530 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15531 %} 15532 ins_pipe( pipe_slow ); 15533 %} 15534 15535 instruct max8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15536 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15537 match(Set dst (MaxV src1 src2)); 15538 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15539 ins_encode %{ 15540 int vector_len = 2; 15541 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15542 %} 15543 ins_pipe( pipe_slow ); 15544 %} 15545 15546 // ------------------------------ Shift --------------------------------------- 15547 15548 // Left and right shift count vectors are the same on x86 15549 // (only lowest bits of xmm reg are used for count). 15550 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 15551 match(Set dst (LShiftCntV cnt)); 15552 match(Set dst (RShiftCntV cnt)); 15553 format %{ "movd $dst,$cnt\t! load shift count" %} 15554 ins_encode %{ 15555 __ movdl($dst$$XMMRegister, $cnt$$Register); 15556 %} 15557 ins_pipe( pipe_slow ); 15558 %} 15559 15560 // --------------------------------- Sqrt -------------------------------------- 15561 15562 // Floating point vector sqrt 15563 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 15564 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15565 match(Set dst (SqrtVD src)); 15566 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 15567 ins_encode %{ 15568 int vector_len = 0; 15569 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15570 %} 15571 ins_pipe( pipe_slow ); 15572 %} 15573 15574 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 15575 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15576 match(Set dst (SqrtVD (LoadVector mem))); 15577 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 15578 ins_encode %{ 15579 int vector_len = 0; 15580 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 15581 %} 15582 ins_pipe( pipe_slow ); 15583 %} 15584 15585 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 15586 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15587 match(Set dst (SqrtVD src)); 15588 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 15589 ins_encode %{ 15590 int vector_len = 1; 15591 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15592 %} 15593 ins_pipe( pipe_slow ); 15594 %} 15595 15596 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 15597 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15598 match(Set dst (SqrtVD (LoadVector mem))); 15599 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 15600 ins_encode %{ 15601 int vector_len = 1; 15602 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 15603 %} 15604 ins_pipe( pipe_slow ); 15605 %} 15606 15607 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 15608 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 15609 match(Set dst (SqrtVD src)); 15610 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 15611 ins_encode %{ 15612 int vector_len = 2; 15613 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15614 %} 15615 ins_pipe( pipe_slow ); 15616 %} 15617 15618 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 15619 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 15620 match(Set dst (SqrtVD (LoadVector mem))); 15621 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 15622 ins_encode %{ 15623 int vector_len = 2; 15624 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 15625 %} 15626 ins_pipe( pipe_slow ); 15627 %} 15628 15629 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 15630 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15631 match(Set dst (SqrtVF src)); 15632 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 15633 ins_encode %{ 15634 int vector_len = 0; 15635 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15636 %} 15637 ins_pipe( pipe_slow ); 15638 %} 15639 15640 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 15641 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15642 match(Set dst (SqrtVF (LoadVector mem))); 15643 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 15644 ins_encode %{ 15645 int vector_len = 0; 15646 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15647 %} 15648 ins_pipe( pipe_slow ); 15649 %} 15650 15651 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 15652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15653 match(Set dst (SqrtVF src)); 15654 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 15655 ins_encode %{ 15656 int vector_len = 0; 15657 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15658 %} 15659 ins_pipe( pipe_slow ); 15660 %} 15661 15662 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 15663 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15664 match(Set dst (SqrtVF (LoadVector mem))); 15665 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 15666 ins_encode %{ 15667 int vector_len = 0; 15668 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15669 %} 15670 ins_pipe( pipe_slow ); 15671 %} 15672 15673 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 15674 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 15675 match(Set dst (SqrtVF src)); 15676 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 15677 ins_encode %{ 15678 int vector_len = 1; 15679 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15680 %} 15681 ins_pipe( pipe_slow ); 15682 %} 15683 15684 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 15685 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 15686 match(Set dst (SqrtVF (LoadVector mem))); 15687 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 15688 ins_encode %{ 15689 int vector_len = 1; 15690 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15691 %} 15692 ins_pipe( pipe_slow ); 15693 %} 15694 15695 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 15696 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 15697 match(Set dst (SqrtVF src)); 15698 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 15699 ins_encode %{ 15700 int vector_len = 2; 15701 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15702 %} 15703 ins_pipe( pipe_slow ); 15704 %} 15705 15706 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 15707 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 15708 match(Set dst (SqrtVF (LoadVector mem))); 15709 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 15710 ins_encode %{ 15711 int vector_len = 2; 15712 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15713 %} 15714 ins_pipe( pipe_slow ); 15715 %} 15716 15717 // ------------------------------ LeftShift ----------------------------------- 15718 15719 // Shorts/Chars vector left shift 15720 instruct vsll2S(vecS dst, vecS shift) %{ 15721 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 15722 match(Set dst (LShiftVS dst shift)); 15723 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 15724 ins_encode %{ 15725 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 15726 %} 15727 ins_pipe( pipe_slow ); 15728 %} 15729 15730 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 15731 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 15732 match(Set dst (LShiftVS dst shift)); 15733 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 15734 ins_encode %{ 15735 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 15736 %} 15737 ins_pipe( pipe_slow ); 15738 %} 15739 15740 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 15741 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 15742 match(Set dst (LShiftVS src shift)); 15743 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15744 ins_encode %{ 15745 int vector_len = 0; 15746 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15747 %} 15748 ins_pipe( pipe_slow ); 15749 %} 15750 15751 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 15752 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 15753 match(Set dst (LShiftVS src shift)); 15754 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15755 ins_encode %{ 15756 int vector_len = 0; 15757 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15758 %} 15759 ins_pipe( pipe_slow ); 15760 %} 15761 15762 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 15763 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 15764 match(Set dst (LShiftVS dst shift)); 15765 effect(TEMP src); 15766 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15767 ins_encode %{ 15768 int vector_len = 0; 15769 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15770 %} 15771 ins_pipe( pipe_slow ); 15772 %} 15773 15774 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 15775 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 15776 match(Set dst (LShiftVS src shift)); 15777 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15778 ins_encode %{ 15779 int vector_len = 0; 15780 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15781 %} 15782 ins_pipe( pipe_slow ); 15783 %} 15784 15785 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 15786 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 15787 match(Set dst (LShiftVS src shift)); 15788 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15789 ins_encode %{ 15790 int vector_len = 0; 15791 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15792 %} 15793 ins_pipe( pipe_slow ); 15794 %} 15795 15796 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 15797 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 15798 match(Set dst (LShiftVS dst shift)); 15799 effect(TEMP src); 15800 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15801 ins_encode %{ 15802 int vector_len = 0; 15803 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15804 %} 15805 ins_pipe( pipe_slow ); 15806 %} 15807 15808 instruct vsll4S(vecD dst, vecS shift) %{ 15809 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 15810 match(Set dst (LShiftVS dst shift)); 15811 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 15812 ins_encode %{ 15813 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 15814 %} 15815 ins_pipe( pipe_slow ); 15816 %} 15817 15818 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 15819 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 15820 match(Set dst (LShiftVS dst shift)); 15821 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 15822 ins_encode %{ 15823 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 15824 %} 15825 ins_pipe( pipe_slow ); 15826 %} 15827 15828 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 15829 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 15830 match(Set dst (LShiftVS src shift)); 15831 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15832 ins_encode %{ 15833 int vector_len = 0; 15834 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15835 %} 15836 ins_pipe( pipe_slow ); 15837 %} 15838 15839 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 15840 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 15841 match(Set dst (LShiftVS src shift)); 15842 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15843 ins_encode %{ 15844 int vector_len = 0; 15845 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15846 %} 15847 ins_pipe( pipe_slow ); 15848 %} 15849 15850 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 15851 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 15852 match(Set dst (LShiftVS dst shift)); 15853 effect(TEMP src); 15854 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15855 ins_encode %{ 15856 int vector_len = 0; 15857 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15858 %} 15859 ins_pipe( pipe_slow ); 15860 %} 15861 15862 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 15863 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 15864 match(Set dst (LShiftVS src shift)); 15865 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15866 ins_encode %{ 15867 int vector_len = 0; 15868 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15869 %} 15870 ins_pipe( pipe_slow ); 15871 %} 15872 15873 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 15874 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 15875 match(Set dst (LShiftVS src shift)); 15876 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15877 ins_encode %{ 15878 int vector_len = 0; 15879 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15880 %} 15881 ins_pipe( pipe_slow ); 15882 %} 15883 15884 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 15885 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 15886 match(Set dst (LShiftVS dst shift)); 15887 effect(TEMP src); 15888 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15889 ins_encode %{ 15890 int vector_len = 0; 15891 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15892 %} 15893 ins_pipe( pipe_slow ); 15894 %} 15895 15896 instruct vsll8S(vecX dst, vecS shift) %{ 15897 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 15898 match(Set dst (LShiftVS dst shift)); 15899 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 15900 ins_encode %{ 15901 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 15902 %} 15903 ins_pipe( pipe_slow ); 15904 %} 15905 15906 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 15907 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 15908 match(Set dst (LShiftVS dst shift)); 15909 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 15910 ins_encode %{ 15911 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 15912 %} 15913 ins_pipe( pipe_slow ); 15914 %} 15915 15916 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 15917 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 15918 match(Set dst (LShiftVS src shift)); 15919 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15920 ins_encode %{ 15921 int vector_len = 0; 15922 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15923 %} 15924 ins_pipe( pipe_slow ); 15925 %} 15926 15927 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 15928 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 15929 match(Set dst (LShiftVS src shift)); 15930 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15931 ins_encode %{ 15932 int vector_len = 0; 15933 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15934 %} 15935 ins_pipe( pipe_slow ); 15936 %} 15937 15938 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 15939 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 15940 match(Set dst (LShiftVS dst shift)); 15941 effect(TEMP src); 15942 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15943 ins_encode %{ 15944 int vector_len = 0; 15945 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15946 %} 15947 ins_pipe( pipe_slow ); 15948 %} 15949 15950 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 15951 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 15952 match(Set dst (LShiftVS src shift)); 15953 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15954 ins_encode %{ 15955 int vector_len = 0; 15956 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15957 %} 15958 ins_pipe( pipe_slow ); 15959 %} 15960 15961 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 15962 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 15963 match(Set dst (LShiftVS src shift)); 15964 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15965 ins_encode %{ 15966 int vector_len = 0; 15967 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15968 %} 15969 ins_pipe( pipe_slow ); 15970 %} 15971 15972 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 15973 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 15974 match(Set dst (LShiftVS dst shift)); 15975 effect(TEMP src); 15976 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15977 ins_encode %{ 15978 int vector_len = 0; 15979 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15980 %} 15981 ins_pipe( pipe_slow ); 15982 %} 15983 15984 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 15985 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 15986 match(Set dst (LShiftVS src shift)); 15987 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 15988 ins_encode %{ 15989 int vector_len = 1; 15990 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15991 %} 15992 ins_pipe( pipe_slow ); 15993 %} 15994 15995 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 15996 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 15997 match(Set dst (LShiftVS src shift)); 15998 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 15999 ins_encode %{ 16000 int vector_len = 1; 16001 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16002 %} 16003 ins_pipe( pipe_slow ); 16004 %} 16005 16006 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 16007 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16008 match(Set dst (LShiftVS dst shift)); 16009 effect(TEMP src); 16010 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16011 ins_encode %{ 16012 int vector_len = 1; 16013 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16014 %} 16015 ins_pipe( pipe_slow ); 16016 %} 16017 16018 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 16019 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 16020 match(Set dst (LShiftVS src shift)); 16021 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16022 ins_encode %{ 16023 int vector_len = 1; 16024 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16025 %} 16026 ins_pipe( pipe_slow ); 16027 %} 16028 16029 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 16030 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 16031 match(Set dst (LShiftVS src shift)); 16032 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16033 ins_encode %{ 16034 int vector_len = 1; 16035 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16036 %} 16037 ins_pipe( pipe_slow ); 16038 %} 16039 16040 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 16041 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16042 match(Set dst (LShiftVS dst shift)); 16043 effect(TEMP src); 16044 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16045 ins_encode %{ 16046 int vector_len = 1; 16047 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16048 %} 16049 ins_pipe( pipe_slow ); 16050 %} 16051 16052 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 16053 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16054 match(Set dst (LShiftVS src shift)); 16055 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 16056 ins_encode %{ 16057 int vector_len = 2; 16058 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16059 %} 16060 ins_pipe( pipe_slow ); 16061 %} 16062 16063 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16064 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16065 match(Set dst (LShiftVS src shift)); 16066 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 16067 ins_encode %{ 16068 int vector_len = 2; 16069 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16070 %} 16071 ins_pipe( pipe_slow ); 16072 %} 16073 16074 // Integers vector left shift 16075 instruct vsll2I(vecD dst, vecS shift) %{ 16076 predicate(n->as_Vector()->length() == 2); 16077 match(Set dst (LShiftVI dst shift)); 16078 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 16079 ins_encode %{ 16080 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 16081 %} 16082 ins_pipe( pipe_slow ); 16083 %} 16084 16085 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 16086 predicate(n->as_Vector()->length() == 2); 16087 match(Set dst (LShiftVI dst shift)); 16088 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 16089 ins_encode %{ 16090 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 16091 %} 16092 ins_pipe( pipe_slow ); 16093 %} 16094 16095 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 16096 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16097 match(Set dst (LShiftVI src shift)); 16098 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 16099 ins_encode %{ 16100 int vector_len = 0; 16101 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16102 %} 16103 ins_pipe( pipe_slow ); 16104 %} 16105 16106 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 16107 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16108 match(Set dst (LShiftVI src shift)); 16109 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 16110 ins_encode %{ 16111 int vector_len = 0; 16112 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16113 %} 16114 ins_pipe( pipe_slow ); 16115 %} 16116 16117 instruct vsll4I(vecX dst, vecS shift) %{ 16118 predicate(n->as_Vector()->length() == 4); 16119 match(Set dst (LShiftVI dst shift)); 16120 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 16121 ins_encode %{ 16122 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 16123 %} 16124 ins_pipe( pipe_slow ); 16125 %} 16126 16127 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 16128 predicate(n->as_Vector()->length() == 4); 16129 match(Set dst (LShiftVI dst shift)); 16130 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 16131 ins_encode %{ 16132 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 16133 %} 16134 ins_pipe( pipe_slow ); 16135 %} 16136 16137 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 16138 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16139 match(Set dst (LShiftVI src shift)); 16140 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 16141 ins_encode %{ 16142 int vector_len = 0; 16143 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16144 %} 16145 ins_pipe( pipe_slow ); 16146 %} 16147 16148 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16149 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16150 match(Set dst (LShiftVI src shift)); 16151 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 16152 ins_encode %{ 16153 int vector_len = 0; 16154 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16155 %} 16156 ins_pipe( pipe_slow ); 16157 %} 16158 16159 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 16160 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16161 match(Set dst (LShiftVI src shift)); 16162 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 16163 ins_encode %{ 16164 int vector_len = 1; 16165 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16166 %} 16167 ins_pipe( pipe_slow ); 16168 %} 16169 16170 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16171 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16172 match(Set dst (LShiftVI src shift)); 16173 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 16174 ins_encode %{ 16175 int vector_len = 1; 16176 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16177 %} 16178 ins_pipe( pipe_slow ); 16179 %} 16180 16181 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 16182 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16183 match(Set dst (LShiftVI src shift)); 16184 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 16185 ins_encode %{ 16186 int vector_len = 2; 16187 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16188 %} 16189 ins_pipe( pipe_slow ); 16190 %} 16191 16192 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16193 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16194 match(Set dst (LShiftVI src shift)); 16195 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 16196 ins_encode %{ 16197 int vector_len = 2; 16198 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16199 %} 16200 ins_pipe( pipe_slow ); 16201 %} 16202 16203 // Longs vector left shift 16204 instruct vsll2L(vecX dst, vecS shift) %{ 16205 predicate(n->as_Vector()->length() == 2); 16206 match(Set dst (LShiftVL dst shift)); 16207 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 16208 ins_encode %{ 16209 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 16210 %} 16211 ins_pipe( pipe_slow ); 16212 %} 16213 16214 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 16215 predicate(n->as_Vector()->length() == 2); 16216 match(Set dst (LShiftVL dst shift)); 16217 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 16218 ins_encode %{ 16219 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 16220 %} 16221 ins_pipe( pipe_slow ); 16222 %} 16223 16224 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 16225 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16226 match(Set dst (LShiftVL src shift)); 16227 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 16228 ins_encode %{ 16229 int vector_len = 0; 16230 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16231 %} 16232 ins_pipe( pipe_slow ); 16233 %} 16234 16235 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16236 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16237 match(Set dst (LShiftVL src shift)); 16238 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 16239 ins_encode %{ 16240 int vector_len = 0; 16241 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16242 %} 16243 ins_pipe( pipe_slow ); 16244 %} 16245 16246 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 16247 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16248 match(Set dst (LShiftVL src shift)); 16249 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 16250 ins_encode %{ 16251 int vector_len = 1; 16252 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16253 %} 16254 ins_pipe( pipe_slow ); 16255 %} 16256 16257 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16258 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16259 match(Set dst (LShiftVL src shift)); 16260 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 16261 ins_encode %{ 16262 int vector_len = 1; 16263 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16264 %} 16265 ins_pipe( pipe_slow ); 16266 %} 16267 16268 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 16269 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16270 match(Set dst (LShiftVL src shift)); 16271 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 16272 ins_encode %{ 16273 int vector_len = 2; 16274 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16275 %} 16276 ins_pipe( pipe_slow ); 16277 %} 16278 16279 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16280 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16281 match(Set dst (LShiftVL src shift)); 16282 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 16283 ins_encode %{ 16284 int vector_len = 2; 16285 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16286 %} 16287 ins_pipe( pipe_slow ); 16288 %} 16289 16290 // ----------------------- LogicalRightShift ----------------------------------- 16291 16292 // Shorts vector logical right shift produces incorrect Java result 16293 // for negative data because java code convert short value into int with 16294 // sign extension before a shift. But char vectors are fine since chars are 16295 // unsigned values. 16296 16297 instruct vsrl2S(vecS dst, vecS shift) %{ 16298 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 16299 match(Set dst (URShiftVS dst shift)); 16300 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 16301 ins_encode %{ 16302 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 16303 %} 16304 ins_pipe( pipe_slow ); 16305 %} 16306 16307 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 16308 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 16309 match(Set dst (URShiftVS dst shift)); 16310 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 16311 ins_encode %{ 16312 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 16313 %} 16314 ins_pipe( pipe_slow ); 16315 %} 16316 16317 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 16318 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16319 match(Set dst (URShiftVS src shift)); 16320 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16321 ins_encode %{ 16322 int vector_len = 0; 16323 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16324 %} 16325 ins_pipe( pipe_slow ); 16326 %} 16327 16328 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 16329 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16330 match(Set dst (URShiftVS src shift)); 16331 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16332 ins_encode %{ 16333 int vector_len = 0; 16334 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16335 %} 16336 ins_pipe( pipe_slow ); 16337 %} 16338 16339 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 16340 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16341 match(Set dst (URShiftVS dst shift)); 16342 effect(TEMP src); 16343 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16344 ins_encode %{ 16345 int vector_len = 0; 16346 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16347 %} 16348 ins_pipe( pipe_slow ); 16349 %} 16350 16351 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 16352 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16353 match(Set dst (URShiftVS src shift)); 16354 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16355 ins_encode %{ 16356 int vector_len = 0; 16357 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16358 %} 16359 ins_pipe( pipe_slow ); 16360 %} 16361 16362 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 16363 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16364 match(Set dst (URShiftVS src shift)); 16365 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16366 ins_encode %{ 16367 int vector_len = 0; 16368 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16369 %} 16370 ins_pipe( pipe_slow ); 16371 %} 16372 16373 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 16374 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16375 match(Set dst (URShiftVS dst shift)); 16376 effect(TEMP src); 16377 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16378 ins_encode %{ 16379 int vector_len = 0; 16380 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16381 %} 16382 ins_pipe( pipe_slow ); 16383 %} 16384 16385 instruct vsrl4S(vecD dst, vecS shift) %{ 16386 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16387 match(Set dst (URShiftVS dst shift)); 16388 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 16389 ins_encode %{ 16390 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 16391 %} 16392 ins_pipe( pipe_slow ); 16393 %} 16394 16395 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 16396 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16397 match(Set dst (URShiftVS dst shift)); 16398 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 16399 ins_encode %{ 16400 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 16401 %} 16402 ins_pipe( pipe_slow ); 16403 %} 16404 16405 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 16406 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 16407 match(Set dst (URShiftVS src shift)); 16408 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16409 ins_encode %{ 16410 int vector_len = 0; 16411 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16412 %} 16413 ins_pipe( pipe_slow ); 16414 %} 16415 16416 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 16417 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 16418 match(Set dst (URShiftVS src shift)); 16419 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16420 ins_encode %{ 16421 int vector_len = 0; 16422 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16423 %} 16424 ins_pipe( pipe_slow ); 16425 %} 16426 16427 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 16428 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 16429 match(Set dst (URShiftVS dst shift)); 16430 effect(TEMP src); 16431 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16432 ins_encode %{ 16433 int vector_len = 0; 16434 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16435 %} 16436 ins_pipe( pipe_slow ); 16437 %} 16438 16439 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 16440 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 16441 match(Set dst (URShiftVS src shift)); 16442 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16443 ins_encode %{ 16444 int vector_len = 0; 16445 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16446 %} 16447 ins_pipe( pipe_slow ); 16448 %} 16449 16450 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 16451 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 16452 match(Set dst (URShiftVS src shift)); 16453 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16454 ins_encode %{ 16455 int vector_len = 0; 16456 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16457 %} 16458 ins_pipe( pipe_slow ); 16459 %} 16460 16461 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 16462 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 16463 match(Set dst (URShiftVS dst shift)); 16464 effect(TEMP src); 16465 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16466 ins_encode %{ 16467 int vector_len = 0; 16468 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16469 %} 16470 ins_pipe( pipe_slow ); 16471 %} 16472 16473 instruct vsrl8S(vecX dst, vecS shift) %{ 16474 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 16475 match(Set dst (URShiftVS dst shift)); 16476 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 16477 ins_encode %{ 16478 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 16479 %} 16480 ins_pipe( pipe_slow ); 16481 %} 16482 16483 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 16484 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 16485 match(Set dst (URShiftVS dst shift)); 16486 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 16487 ins_encode %{ 16488 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 16489 %} 16490 ins_pipe( pipe_slow ); 16491 %} 16492 16493 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 16494 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 16495 match(Set dst (URShiftVS src shift)); 16496 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16497 ins_encode %{ 16498 int vector_len = 0; 16499 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16500 %} 16501 ins_pipe( pipe_slow ); 16502 %} 16503 16504 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 16505 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 16506 match(Set dst (URShiftVS src shift)); 16507 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16508 ins_encode %{ 16509 int vector_len = 0; 16510 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16511 %} 16512 ins_pipe( pipe_slow ); 16513 %} 16514 16515 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 16516 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 16517 match(Set dst (URShiftVS dst shift)); 16518 effect(TEMP src); 16519 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16520 ins_encode %{ 16521 int vector_len = 0; 16522 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16523 %} 16524 ins_pipe( pipe_slow ); 16525 %} 16526 16527 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 16528 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 16529 match(Set dst (URShiftVS src shift)); 16530 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16531 ins_encode %{ 16532 int vector_len = 0; 16533 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16534 %} 16535 ins_pipe( pipe_slow ); 16536 %} 16537 16538 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 16539 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 16540 match(Set dst (URShiftVS src shift)); 16541 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16542 ins_encode %{ 16543 int vector_len = 0; 16544 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16545 %} 16546 ins_pipe( pipe_slow ); 16547 %} 16548 16549 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 16550 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 16551 match(Set dst (URShiftVS dst shift)); 16552 effect(TEMP src); 16553 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16554 ins_encode %{ 16555 int vector_len = 0; 16556 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16557 %} 16558 ins_pipe( pipe_slow ); 16559 %} 16560 16561 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 16562 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 16563 match(Set dst (URShiftVS src shift)); 16564 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16565 ins_encode %{ 16566 int vector_len = 1; 16567 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16568 %} 16569 ins_pipe( pipe_slow ); 16570 %} 16571 16572 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 16573 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 16574 match(Set dst (URShiftVS src shift)); 16575 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16576 ins_encode %{ 16577 int vector_len = 1; 16578 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16579 %} 16580 ins_pipe( pipe_slow ); 16581 %} 16582 16583 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 16584 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16585 match(Set dst (URShiftVS dst shift)); 16586 effect(TEMP src); 16587 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16588 ins_encode %{ 16589 int vector_len = 1; 16590 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16591 %} 16592 ins_pipe( pipe_slow ); 16593 %} 16594 16595 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 16596 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 16597 match(Set dst (URShiftVS src shift)); 16598 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16599 ins_encode %{ 16600 int vector_len = 1; 16601 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16602 %} 16603 ins_pipe( pipe_slow ); 16604 %} 16605 16606 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 16607 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 16608 match(Set dst (URShiftVS src shift)); 16609 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16610 ins_encode %{ 16611 int vector_len = 1; 16612 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16613 %} 16614 ins_pipe( pipe_slow ); 16615 %} 16616 16617 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 16618 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16619 match(Set dst (URShiftVS dst shift)); 16620 effect(TEMP src); 16621 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16622 ins_encode %{ 16623 int vector_len = 1; 16624 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16625 %} 16626 ins_pipe( pipe_slow ); 16627 %} 16628 16629 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 16630 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16631 match(Set dst (URShiftVS src shift)); 16632 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 16633 ins_encode %{ 16634 int vector_len = 2; 16635 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16636 %} 16637 ins_pipe( pipe_slow ); 16638 %} 16639 16640 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16641 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16642 match(Set dst (URShiftVS src shift)); 16643 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 16644 ins_encode %{ 16645 int vector_len = 2; 16646 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16647 %} 16648 ins_pipe( pipe_slow ); 16649 %} 16650 16651 // Integers vector logical right shift 16652 instruct vsrl2I(vecD dst, vecS shift) %{ 16653 predicate(n->as_Vector()->length() == 2); 16654 match(Set dst (URShiftVI dst shift)); 16655 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 16656 ins_encode %{ 16657 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 16658 %} 16659 ins_pipe( pipe_slow ); 16660 %} 16661 16662 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 16663 predicate(n->as_Vector()->length() == 2); 16664 match(Set dst (URShiftVI dst shift)); 16665 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 16666 ins_encode %{ 16667 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 16668 %} 16669 ins_pipe( pipe_slow ); 16670 %} 16671 16672 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 16673 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16674 match(Set dst (URShiftVI src shift)); 16675 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 16676 ins_encode %{ 16677 int vector_len = 0; 16678 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16679 %} 16680 ins_pipe( pipe_slow ); 16681 %} 16682 16683 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 16684 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16685 match(Set dst (URShiftVI src shift)); 16686 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 16687 ins_encode %{ 16688 int vector_len = 0; 16689 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16690 %} 16691 ins_pipe( pipe_slow ); 16692 %} 16693 16694 instruct vsrl4I(vecX dst, vecS shift) %{ 16695 predicate(n->as_Vector()->length() == 4); 16696 match(Set dst (URShiftVI dst shift)); 16697 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 16698 ins_encode %{ 16699 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 16700 %} 16701 ins_pipe( pipe_slow ); 16702 %} 16703 16704 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 16705 predicate(n->as_Vector()->length() == 4); 16706 match(Set dst (URShiftVI dst shift)); 16707 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 16708 ins_encode %{ 16709 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 16710 %} 16711 ins_pipe( pipe_slow ); 16712 %} 16713 16714 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 16715 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16716 match(Set dst (URShiftVI src shift)); 16717 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 16718 ins_encode %{ 16719 int vector_len = 0; 16720 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16721 %} 16722 ins_pipe( pipe_slow ); 16723 %} 16724 16725 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16726 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16727 match(Set dst (URShiftVI src shift)); 16728 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 16729 ins_encode %{ 16730 int vector_len = 0; 16731 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16732 %} 16733 ins_pipe( pipe_slow ); 16734 %} 16735 16736 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 16737 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16738 match(Set dst (URShiftVI src shift)); 16739 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 16740 ins_encode %{ 16741 int vector_len = 1; 16742 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16743 %} 16744 ins_pipe( pipe_slow ); 16745 %} 16746 16747 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16748 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16749 match(Set dst (URShiftVI src shift)); 16750 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 16751 ins_encode %{ 16752 int vector_len = 1; 16753 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16754 %} 16755 ins_pipe( pipe_slow ); 16756 %} 16757 16758 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 16759 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16760 match(Set dst (URShiftVI src shift)); 16761 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 16762 ins_encode %{ 16763 int vector_len = 2; 16764 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16765 %} 16766 ins_pipe( pipe_slow ); 16767 %} 16768 16769 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16770 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16771 match(Set dst (URShiftVI src shift)); 16772 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 16773 ins_encode %{ 16774 int vector_len = 2; 16775 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16776 %} 16777 ins_pipe( pipe_slow ); 16778 %} 16779 16780 // Longs vector logical right shift 16781 instruct vsrl2L(vecX dst, vecS shift) %{ 16782 predicate(n->as_Vector()->length() == 2); 16783 match(Set dst (URShiftVL dst shift)); 16784 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 16785 ins_encode %{ 16786 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 16787 %} 16788 ins_pipe( pipe_slow ); 16789 %} 16790 16791 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 16792 predicate(n->as_Vector()->length() == 2); 16793 match(Set dst (URShiftVL dst shift)); 16794 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 16795 ins_encode %{ 16796 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 16797 %} 16798 ins_pipe( pipe_slow ); 16799 %} 16800 16801 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 16802 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16803 match(Set dst (URShiftVL src shift)); 16804 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 16805 ins_encode %{ 16806 int vector_len = 0; 16807 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16808 %} 16809 ins_pipe( pipe_slow ); 16810 %} 16811 16812 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16813 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16814 match(Set dst (URShiftVL src shift)); 16815 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 16816 ins_encode %{ 16817 int vector_len = 0; 16818 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16819 %} 16820 ins_pipe( pipe_slow ); 16821 %} 16822 16823 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 16824 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16825 match(Set dst (URShiftVL src shift)); 16826 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 16827 ins_encode %{ 16828 int vector_len = 1; 16829 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16830 %} 16831 ins_pipe( pipe_slow ); 16832 %} 16833 16834 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16835 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16836 match(Set dst (URShiftVL src shift)); 16837 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 16838 ins_encode %{ 16839 int vector_len = 1; 16840 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16841 %} 16842 ins_pipe( pipe_slow ); 16843 %} 16844 16845 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 16846 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16847 match(Set dst (URShiftVL src shift)); 16848 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 16849 ins_encode %{ 16850 int vector_len = 2; 16851 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16852 %} 16853 ins_pipe( pipe_slow ); 16854 %} 16855 16856 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16857 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16858 match(Set dst (URShiftVL src shift)); 16859 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 16860 ins_encode %{ 16861 int vector_len = 2; 16862 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16863 %} 16864 ins_pipe( pipe_slow ); 16865 %} 16866 16867 // ------------------- ArithmeticRightShift ----------------------------------- 16868 16869 // Shorts/Chars vector arithmetic right shift 16870 instruct vsra2S(vecS dst, vecS shift) %{ 16871 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 16872 match(Set dst (RShiftVS dst shift)); 16873 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 16874 ins_encode %{ 16875 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 16876 %} 16877 ins_pipe( pipe_slow ); 16878 %} 16879 16880 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 16881 predicate(n->as_Vector()->length() == 2); 16882 match(Set dst (RShiftVS dst shift)); 16883 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 16884 ins_encode %{ 16885 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 16886 %} 16887 ins_pipe( pipe_slow ); 16888 %} 16889 16890 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 16891 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16892 match(Set dst (RShiftVS src shift)); 16893 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16894 ins_encode %{ 16895 int vector_len = 0; 16896 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16897 %} 16898 ins_pipe( pipe_slow ); 16899 %} 16900 16901 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 16902 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16903 match(Set dst (RShiftVS src shift)); 16904 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16905 ins_encode %{ 16906 int vector_len = 0; 16907 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16908 %} 16909 ins_pipe( pipe_slow ); 16910 %} 16911 16912 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 16913 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16914 match(Set dst (RShiftVS dst shift)); 16915 effect(TEMP src); 16916 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16917 ins_encode %{ 16918 int vector_len = 0; 16919 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16920 %} 16921 ins_pipe( pipe_slow ); 16922 %} 16923 16924 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 16925 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16926 match(Set dst (RShiftVS src shift)); 16927 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16928 ins_encode %{ 16929 int vector_len = 0; 16930 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16931 %} 16932 ins_pipe( pipe_slow ); 16933 %} 16934 16935 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 16936 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16937 match(Set dst (RShiftVS src shift)); 16938 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16939 ins_encode %{ 16940 int vector_len = 0; 16941 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16942 %} 16943 ins_pipe( pipe_slow ); 16944 %} 16945 16946 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 16947 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16948 match(Set dst (RShiftVS dst shift)); 16949 effect(TEMP src); 16950 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16951 ins_encode %{ 16952 int vector_len = 0; 16953 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16954 %} 16955 ins_pipe( pipe_slow ); 16956 %} 16957 16958 instruct vsra4S(vecD dst, vecS shift) %{ 16959 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16960 match(Set dst (RShiftVS dst shift)); 16961 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 16962 ins_encode %{ 16963 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 16964 %} 16965 ins_pipe( pipe_slow ); 16966 %} 16967 16968 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 16969 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16970 match(Set dst (RShiftVS dst shift)); 16971 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 16972 ins_encode %{ 16973 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 16974 %} 16975 ins_pipe( pipe_slow ); 16976 %} 16977 16978 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 16979 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 16980 match(Set dst (RShiftVS src shift)); 16981 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 16982 ins_encode %{ 16983 int vector_len = 0; 16984 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16985 %} 16986 ins_pipe( pipe_slow ); 16987 %} 16988 16989 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 16990 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 16991 match(Set dst (RShiftVS src shift)); 16992 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 16993 ins_encode %{ 16994 int vector_len = 0; 16995 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16996 %} 16997 ins_pipe( pipe_slow ); 16998 %} 16999 17000 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 17001 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 17002 match(Set dst (RShiftVS dst shift)); 17003 effect(TEMP src); 17004 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17005 ins_encode %{ 17006 int vector_len = 0; 17007 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17008 %} 17009 ins_pipe( pipe_slow ); 17010 %} 17011 17012 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 17013 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 17014 match(Set dst (RShiftVS src shift)); 17015 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17016 ins_encode %{ 17017 int vector_len = 0; 17018 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17019 %} 17020 ins_pipe( pipe_slow ); 17021 %} 17022 17023 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 17024 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 17025 match(Set dst (RShiftVS src shift)); 17026 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17027 ins_encode %{ 17028 int vector_len = 0; 17029 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17030 %} 17031 ins_pipe( pipe_slow ); 17032 %} 17033 17034 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 17035 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 17036 match(Set dst (RShiftVS dst shift)); 17037 effect(TEMP src); 17038 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17039 ins_encode %{ 17040 int vector_len = 0; 17041 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17042 %} 17043 ins_pipe( pipe_slow ); 17044 %} 17045 17046 instruct vsra8S(vecX dst, vecS shift) %{ 17047 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 17048 match(Set dst (RShiftVS dst shift)); 17049 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 17050 ins_encode %{ 17051 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 17052 %} 17053 ins_pipe( pipe_slow ); 17054 %} 17055 17056 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 17057 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 17058 match(Set dst (RShiftVS dst shift)); 17059 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 17060 ins_encode %{ 17061 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 17062 %} 17063 ins_pipe( pipe_slow ); 17064 %} 17065 17066 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 17067 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 17068 match(Set dst (RShiftVS src shift)); 17069 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17070 ins_encode %{ 17071 int vector_len = 0; 17072 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17073 %} 17074 ins_pipe( pipe_slow ); 17075 %} 17076 17077 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 17078 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 17079 match(Set dst (RShiftVS src shift)); 17080 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17081 ins_encode %{ 17082 int vector_len = 0; 17083 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17084 %} 17085 ins_pipe( pipe_slow ); 17086 %} 17087 17088 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 17089 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 17090 match(Set dst (RShiftVS dst shift)); 17091 effect(TEMP src); 17092 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17093 ins_encode %{ 17094 int vector_len = 0; 17095 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17096 %} 17097 ins_pipe( pipe_slow ); 17098 %} 17099 17100 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 17101 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 17102 match(Set dst (RShiftVS src shift)); 17103 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17104 ins_encode %{ 17105 int vector_len = 0; 17106 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17107 %} 17108 ins_pipe( pipe_slow ); 17109 %} 17110 17111 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 17112 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 17113 match(Set dst (RShiftVS src shift)); 17114 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17115 ins_encode %{ 17116 int vector_len = 0; 17117 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17118 %} 17119 ins_pipe( pipe_slow ); 17120 %} 17121 17122 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 17123 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 17124 match(Set dst (RShiftVS dst shift)); 17125 effect(TEMP src); 17126 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17127 ins_encode %{ 17128 int vector_len = 0; 17129 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17130 %} 17131 ins_pipe( pipe_slow ); 17132 %} 17133 17134 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 17135 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 17136 match(Set dst (RShiftVS src shift)); 17137 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17138 ins_encode %{ 17139 int vector_len = 1; 17140 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17141 %} 17142 ins_pipe( pipe_slow ); 17143 %} 17144 17145 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 17146 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 17147 match(Set dst (RShiftVS src shift)); 17148 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17149 ins_encode %{ 17150 int vector_len = 1; 17151 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17152 %} 17153 ins_pipe( pipe_slow ); 17154 %} 17155 17156 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 17157 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 17158 match(Set dst (RShiftVS dst shift)); 17159 effect(TEMP src); 17160 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17161 ins_encode %{ 17162 int vector_len = 1; 17163 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17164 %} 17165 ins_pipe( pipe_slow ); 17166 %} 17167 17168 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 17169 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 17170 match(Set dst (RShiftVS src shift)); 17171 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17172 ins_encode %{ 17173 int vector_len = 1; 17174 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17175 %} 17176 ins_pipe( pipe_slow ); 17177 %} 17178 17179 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 17180 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 17181 match(Set dst (RShiftVS src shift)); 17182 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17183 ins_encode %{ 17184 int vector_len = 1; 17185 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17186 %} 17187 ins_pipe( pipe_slow ); 17188 %} 17189 17190 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 17191 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 17192 match(Set dst (RShiftVS dst shift)); 17193 effect(TEMP src); 17194 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17195 ins_encode %{ 17196 int vector_len = 1; 17197 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17198 %} 17199 ins_pipe( pipe_slow ); 17200 %} 17201 17202 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 17203 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 17204 match(Set dst (RShiftVS src shift)); 17205 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 17206 ins_encode %{ 17207 int vector_len = 2; 17208 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17209 %} 17210 ins_pipe( pipe_slow ); 17211 %} 17212 17213 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 17214 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 17215 match(Set dst (RShiftVS src shift)); 17216 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 17217 ins_encode %{ 17218 int vector_len = 2; 17219 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17220 %} 17221 ins_pipe( pipe_slow ); 17222 %} 17223 17224 // Integers vector arithmetic right shift 17225 instruct vsra2I(vecD dst, vecS shift) %{ 17226 predicate(n->as_Vector()->length() == 2); 17227 match(Set dst (RShiftVI dst shift)); 17228 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 17229 ins_encode %{ 17230 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 17231 %} 17232 ins_pipe( pipe_slow ); 17233 %} 17234 17235 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 17236 predicate(n->as_Vector()->length() == 2); 17237 match(Set dst (RShiftVI dst shift)); 17238 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 17239 ins_encode %{ 17240 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 17241 %} 17242 ins_pipe( pipe_slow ); 17243 %} 17244 17245 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 17246 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17247 match(Set dst (RShiftVI src shift)); 17248 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 17249 ins_encode %{ 17250 int vector_len = 0; 17251 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17252 %} 17253 ins_pipe( pipe_slow ); 17254 %} 17255 17256 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 17257 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17258 match(Set dst (RShiftVI src shift)); 17259 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 17260 ins_encode %{ 17261 int vector_len = 0; 17262 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17263 %} 17264 ins_pipe( pipe_slow ); 17265 %} 17266 17267 instruct vsra4I(vecX dst, vecS shift) %{ 17268 predicate(n->as_Vector()->length() == 4); 17269 match(Set dst (RShiftVI dst shift)); 17270 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 17271 ins_encode %{ 17272 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 17273 %} 17274 ins_pipe( pipe_slow ); 17275 %} 17276 17277 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 17278 predicate(n->as_Vector()->length() == 4); 17279 match(Set dst (RShiftVI dst shift)); 17280 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 17281 ins_encode %{ 17282 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 17283 %} 17284 ins_pipe( pipe_slow ); 17285 %} 17286 17287 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 17288 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 17289 match(Set dst (RShiftVI src shift)); 17290 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 17291 ins_encode %{ 17292 int vector_len = 0; 17293 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17294 %} 17295 ins_pipe( pipe_slow ); 17296 %} 17297 17298 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 17299 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 17300 match(Set dst (RShiftVI src shift)); 17301 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 17302 ins_encode %{ 17303 int vector_len = 0; 17304 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17305 %} 17306 ins_pipe( pipe_slow ); 17307 %} 17308 17309 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 17310 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 17311 match(Set dst (RShiftVI src shift)); 17312 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 17313 ins_encode %{ 17314 int vector_len = 1; 17315 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17316 %} 17317 ins_pipe( pipe_slow ); 17318 %} 17319 17320 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 17321 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 17322 match(Set dst (RShiftVI src shift)); 17323 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 17324 ins_encode %{ 17325 int vector_len = 1; 17326 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17327 %} 17328 ins_pipe( pipe_slow ); 17329 %} 17330 17331 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 17332 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17333 match(Set dst (RShiftVI src shift)); 17334 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 17335 ins_encode %{ 17336 int vector_len = 2; 17337 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17338 %} 17339 ins_pipe( pipe_slow ); 17340 %} 17341 17342 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 17343 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17344 match(Set dst (RShiftVI src shift)); 17345 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 17346 ins_encode %{ 17347 int vector_len = 2; 17348 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17349 %} 17350 ins_pipe( pipe_slow ); 17351 %} 17352 17353 // Long vector arithmetic right shift 17354 instruct vsra1L(vecD dst, vecD src, vecS shift, vecD tmp) %{ 17355 predicate(n->as_Vector()->length() == 1); 17356 match(Set dst (RShiftVL src shift)); 17357 effect(TEMP dst, TEMP tmp); 17358 format %{ "movdqu $dst,$src\n\t" 17359 "psrlq $dst,$shift\n\t" 17360 "movdqu $tmp,[0x8000000000000000]\n\t" 17361 "psrlq $tmp,$shift\n\t" 17362 "pxor $dst,$tmp\n\t" 17363 "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} 17364 ins_encode %{ 17365 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 17366 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 17367 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17368 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 17369 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 17370 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 17371 %} 17372 ins_pipe( pipe_slow ); 17373 %} 17374 17375 instruct vsra1L_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ 17376 predicate(n->as_Vector()->length() == 1); 17377 match(Set dst (RShiftVL src shift)); 17378 effect(TEMP dst, TEMP tmp); 17379 format %{ "movdqu $dst,$src\n\t" 17380 "psrlq $dst,$shift\n\t" 17381 "movdqu $tmp,[0x8000000000000000]\n\t" 17382 "psrlq $tmp,$shift\n\t" 17383 "pxor $dst,$tmp\n\t" 17384 "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} 17385 ins_encode %{ 17386 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 17387 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 17388 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17389 __ psrlq($tmp$$XMMRegister, (int)$shift$$constant); 17390 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 17391 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 17392 %} 17393 ins_pipe( pipe_slow ); 17394 %} 17395 17396 instruct vsra1L_reg(vecD dst, vecD src, vecS shift, vecD tmp) %{ 17397 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 17398 match(Set dst (RShiftVL src shift)); 17399 effect(TEMP dst, TEMP tmp); 17400 format %{ "vpsrlq $dst,$src,$shift\n\t" 17401 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17402 "vpsrlq $tmp,$tmp,$shift\n\t" 17403 "vpxor $dst,$dst,$tmp\n\t" 17404 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} 17405 ins_encode %{ 17406 int vector_len = 0; 17407 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17408 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17409 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17410 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17411 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17412 %} 17413 ins_pipe( pipe_slow ); 17414 %} 17415 17416 instruct vsra1L_reg_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ 17417 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 17418 match(Set dst (RShiftVL src shift)); 17419 effect(TEMP dst, TEMP tmp); 17420 format %{ "vpsrlq $dst,$src,$shift\n\t" 17421 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17422 "vpsrlq $tmp,$tmp,$shift\n\t" 17423 "vpxor $dst,$dst,$tmp\n\t" 17424 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} 17425 ins_encode %{ 17426 int vector_len = 0; 17427 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17428 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17429 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); 17430 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17431 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17432 %} 17433 ins_pipe( pipe_slow ); 17434 %} 17435 17436 instruct vsra1L_reg_evex(vecD dst, vecD src, vecS shift) %{ 17437 predicate(UseAVX > 2 && n->as_Vector()->length() == 1); 17438 match(Set dst (RShiftVL src shift)); 17439 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed1L" %} 17440 ins_encode %{ 17441 int vector_len = 0; 17442 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17443 %} 17444 ins_pipe( pipe_slow ); 17445 %} 17446 17447 instruct vsra2L_reg_imm(vecX dst, vecX src, immI8 shift, vecX tmp) %{ 17448 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17449 match(Set dst (RShiftVL src shift)); 17450 effect(TEMP dst, TEMP tmp); 17451 format %{ "vpsrlq $dst,$src,$shift\n\t" 17452 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17453 "vpsrlq $tmp,$tmp,$shift\n\t" 17454 "vpxor $dst,$dst,$tmp\n\t" 17455 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed2L" %} 17456 ins_encode %{ 17457 int vector_len = 0; 17458 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17459 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17460 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); 17461 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17462 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17463 %} 17464 ins_pipe( pipe_slow ); 17465 %} 17466 17467 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp) %{ 17468 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17469 match(Set dst (RShiftVL src shift)); 17470 effect(TEMP dst, TEMP tmp); 17471 format %{ "vpsrlq $dst,$src,$shift\n\t" 17472 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17473 "vpsrlq $tmp,$tmp,$shift\n\t" 17474 "vpxor $dst,$dst,$tmp\n\t" 17475 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed2L" %} 17476 ins_encode %{ 17477 int vector_len = 0; 17478 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17479 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17480 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17481 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17482 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17483 %} 17484 ins_pipe( pipe_slow ); 17485 %} 17486 17487 instruct vsra2L_reg_evex_imm(vecX dst, vecX src, immI8 shift) %{ 17488 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 17489 match(Set dst (RShiftVL src shift)); 17490 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17491 ins_encode %{ 17492 int vector_len = 0; 17493 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17494 %} 17495 ins_pipe( pipe_slow ); 17496 %} 17497 17498 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ 17499 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 17500 match(Set dst (RShiftVL src shift)); 17501 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17502 ins_encode %{ 17503 int vector_len = 0; 17504 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17505 %} 17506 ins_pipe( pipe_slow ); 17507 %} 17508 17509 instruct vsra4L_reg_imm(vecY dst, vecY src, immI8 shift, vecY tmp) %{ 17510 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 17511 match(Set dst (RShiftVL src shift)); 17512 effect(TEMP dst, TEMP tmp); 17513 format %{ "vpsrlq $dst,$src,$shift\n\t" 17514 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17515 "vpsrlq $tmp,$tmp,$shift\n\t" 17516 "vpxor $dst,$dst,$tmp\n\t" 17517 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 17518 ins_encode %{ 17519 int vector_len = 1; 17520 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17521 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17522 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); 17523 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17524 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17525 %} 17526 ins_pipe( pipe_slow ); 17527 %} 17528 17529 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp) %{ 17530 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 17531 match(Set dst (RShiftVL src shift)); 17532 effect(TEMP dst, TEMP tmp); 17533 format %{ "vpsrlq $dst,$src,$shift\n\t" 17534 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17535 "vpsrlq $tmp,$tmp,$shift\n\t" 17536 "vpxor $dst,$dst,$tmp\n\t" 17537 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 17538 ins_encode %{ 17539 int vector_len = 1; 17540 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17541 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17542 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17543 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17544 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17545 %} 17546 ins_pipe( pipe_slow ); 17547 %} 17548 17549 instruct vsra4L_reg_evex_imm(vecY dst, vecY src, immI8 shift) %{ 17550 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 17551 match(Set dst (RShiftVL src shift)); 17552 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17553 ins_encode %{ 17554 int vector_len = 1; 17555 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17556 %} 17557 ins_pipe( pipe_slow ); 17558 %} 17559 17560 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ 17561 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 17562 match(Set dst (RShiftVL src shift)); 17563 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} 17564 ins_encode %{ 17565 int vector_len = 1; 17566 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17567 %} 17568 ins_pipe( pipe_slow ); 17569 %} 17570 17571 instruct vsra8L_reg_evex_imm(vecZ dst, vecZ src, immI8 shift) %{ 17572 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 17573 match(Set dst (RShiftVL src shift)); 17574 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17575 ins_encode %{ 17576 int vector_len = 2; 17577 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17578 %} 17579 ins_pipe( pipe_slow ); 17580 %} 17581 17582 instruct vsra8L_reg_evex(vecZ dst, vecZ src, vecS shift) %{ 17583 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 17584 match(Set dst (RShiftVL src shift)); 17585 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed8L" %} 17586 ins_encode %{ 17587 int vector_len = 2; 17588 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17589 %} 17590 ins_pipe( pipe_slow ); 17591 %} 17592 17593 // ------------------- Variable Bit Shift Left Logical ----------------------------- 17594 //Integer Variable left shift 17595 instruct vsllv2I(vecD dst, vecD src, vecD shift) %{ 17596 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 17597 match(Set dst (LShiftVI src shift)); 17598 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed2I" %} 17599 ins_encode %{ 17600 int vector_len = 0; 17601 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17602 %} 17603 ins_pipe( pipe_slow ); 17604 %} 17605 17606 instruct vsllv4I_reg(vecX dst, vecX src, vecX shift) %{ 17607 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 17608 match(Set dst (LShiftVI src shift)); 17609 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 17610 ins_encode %{ 17611 int vector_len = 0; 17612 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17613 %} 17614 ins_pipe( pipe_slow ); 17615 %} 17616 17617 instruct vsllv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 17618 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 17619 match(Set dst (LShiftVI src shift)); 17620 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 17621 ins_encode %{ 17622 int vector_len = 0; 17623 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17624 %} 17625 ins_pipe( pipe_slow ); 17626 %} 17627 17628 instruct vsllv8I_reg(vecY dst, vecY src, vecY shift) %{ 17629 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17630 match(Set dst (LShiftVI src shift)); 17631 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 17632 ins_encode %{ 17633 int vector_len = 1; 17634 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17635 %} 17636 ins_pipe( pipe_slow ); 17637 %} 17638 17639 instruct vsllv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 17640 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17641 match(Set dst (LShiftVI src shift)); 17642 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 17643 ins_encode %{ 17644 int vector_len = 1; 17645 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17646 %} 17647 ins_pipe( pipe_slow ); 17648 %} 17649 17650 instruct vsllv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17651 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_LShiftCntV); 17652 match(Set dst (LShiftVI src shift)); 17653 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 17654 ins_encode %{ 17655 int vector_len = 2; 17656 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17657 %} 17658 ins_pipe( pipe_slow ); 17659 %} 17660 17661 //Long Variable left shift 17662 instruct vsllv1L_reg(vecD dst, vecD src, vecD shift) %{ 17663 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_LShiftCntV); 17664 match(Set dst (LShiftVL src shift)); 17665 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed1L" %} 17666 ins_encode %{ 17667 int vector_len = 0; 17668 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17669 %} 17670 ins_pipe( pipe_slow ); 17671 %} 17672 17673 instruct vsllv2L_reg(vecX dst, vecX src, vecX shift) %{ 17674 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 17675 match(Set dst (LShiftVL src shift)); 17676 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 17677 ins_encode %{ 17678 int vector_len = 0; 17679 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17680 %} 17681 ins_pipe( pipe_slow ); 17682 %} 17683 17684 instruct vsllv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 17685 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 17686 match(Set dst (LShiftVL src shift)); 17687 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 17688 ins_encode %{ 17689 int vector_len = 0; 17690 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17691 %} 17692 ins_pipe( pipe_slow ); 17693 %} 17694 17695 instruct vsllv4L_reg(vecY dst, vecY src, vecY shift) %{ 17696 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 17697 match(Set dst (LShiftVL src shift)); 17698 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 17699 ins_encode %{ 17700 int vector_len = 1; 17701 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17702 %} 17703 ins_pipe( pipe_slow ); 17704 %} 17705 17706 instruct vsllv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 17707 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17708 match(Set dst (LShiftVL src shift)); 17709 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 17710 ins_encode %{ 17711 int vector_len = 1; 17712 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17713 %} 17714 ins_pipe( pipe_slow ); 17715 %} 17716 17717 instruct vsllv8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17718 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17719 match(Set dst (LShiftVL src shift)); 17720 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 17721 ins_encode %{ 17722 int vector_len = 2; 17723 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17724 %} 17725 ins_pipe( pipe_slow ); 17726 %} 17727 17728 // ------------------- Variable Bit Shift Right Logical ----------------------------- 17729 //Integer Variable right shift 17730 instruct vsrlv2I_reg(vecD dst, vecD src, vecD shift) %{ 17731 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17732 match(Set dst (URShiftVI src shift)); 17733 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 17734 ins_encode %{ 17735 int vector_len = 0; 17736 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17737 %} 17738 ins_pipe( pipe_slow ); 17739 %} 17740 17741 instruct vsrlv4I_reg(vecX dst, vecX src, vecX shift) %{ 17742 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17743 match(Set dst (URShiftVI src shift)); 17744 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17745 ins_encode %{ 17746 int vector_len = 0; 17747 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17748 %} 17749 ins_pipe( pipe_slow ); 17750 %} 17751 17752 instruct vsrlv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 17753 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17754 match(Set dst (URShiftVI src shift)); 17755 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17756 ins_encode %{ 17757 int vector_len = 0; 17758 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17759 %} 17760 ins_pipe( pipe_slow ); 17761 %} 17762 17763 instruct vsrlv8I_reg(vecY dst, vecY src, vecY shift) %{ 17764 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17765 match(Set dst (URShiftVI src shift)); 17766 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17767 ins_encode %{ 17768 int vector_len = 1; 17769 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17770 %} 17771 ins_pipe( pipe_slow ); 17772 %} 17773 17774 instruct vsrlv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 17775 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17776 match(Set dst (URShiftVI src shift)); 17777 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17778 ins_encode %{ 17779 int vector_len = 1; 17780 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17781 %} 17782 ins_pipe( pipe_slow ); 17783 %} 17784 17785 instruct vsrlv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17786 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 17787 match(Set dst (URShiftVI src shift)); 17788 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 17789 ins_encode %{ 17790 int vector_len = 2; 17791 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17792 %} 17793 ins_pipe( pipe_slow ); 17794 %} 17795 17796 //Long Variable right shift 17797 instruct vsrlv1L_reg(vecD dst, vecD src, vecD shift) %{ 17798 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 17799 match(Set dst (URShiftVL src shift)); 17800 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} 17801 ins_encode %{ 17802 int vector_len = 0; 17803 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17804 %} 17805 ins_pipe( pipe_slow ); 17806 %} 17807 17808 instruct vsrlv2L_reg(vecX dst, vecX src, vecX shift) %{ 17809 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17810 match(Set dst (URShiftVL src shift)); 17811 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 17812 ins_encode %{ 17813 int vector_len = 0; 17814 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17815 %} 17816 ins_pipe( pipe_slow ); 17817 %} 17818 17819 instruct vsrlv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 17820 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17821 match(Set dst (URShiftVL src shift)); 17822 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 17823 ins_encode %{ 17824 int vector_len = 0; 17825 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17826 %} 17827 ins_pipe( pipe_slow ); 17828 %} 17829 17830 instruct vsrlv4L_reg(vecY dst, vecY src, vecY shift) %{ 17831 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17832 match(Set dst (URShiftVL src shift)); 17833 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 17834 ins_encode %{ 17835 int vector_len = 1; 17836 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17837 %} 17838 ins_pipe( pipe_slow ); 17839 %} 17840 17841 instruct vsrlv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 17842 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17843 match(Set dst (URShiftVL src shift)); 17844 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 17845 ins_encode %{ 17846 int vector_len = 1; 17847 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17848 %} 17849 ins_pipe( pipe_slow ); 17850 %} 17851 17852 instruct vsrlv8L_reg(vecZ dst, vecZ src, vecZ shift) %{ 17853 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17854 match(Set dst (URShiftVL src shift)); 17855 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 17856 ins_encode %{ 17857 int vector_len = 2; 17858 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17859 %} 17860 ins_pipe( pipe_slow ); 17861 %} 17862 17863 // ------------------- Variable Bit Shift Right Arithmetic ----------------------------- 17864 //Integer Variable right shift 17865 instruct vsrav2I_reg(vecD dst, vecD src, vecD shift) %{ 17866 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17867 match(Set dst (RShiftVI src shift)); 17868 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 17869 ins_encode %{ 17870 int vector_len = 0; 17871 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17872 %} 17873 ins_pipe( pipe_slow ); 17874 %} 17875 17876 instruct vsrav4I_reg(vecX dst, vecX src, vecX shift) %{ 17877 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17878 match(Set dst (RShiftVI src shift)); 17879 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17880 ins_encode %{ 17881 int vector_len = 0; 17882 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17883 %} 17884 ins_pipe( pipe_slow ); 17885 %} 17886 17887 instruct vsrav4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 17888 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17889 match(Set dst (RShiftVI src shift)); 17890 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17891 ins_encode %{ 17892 int vector_len = 0; 17893 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17894 %} 17895 ins_pipe( pipe_slow ); 17896 %} 17897 17898 instruct vsrav8I_reg(vecY dst, vecY src, vecY shift) %{ 17899 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17900 match(Set dst (RShiftVI src shift)); 17901 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17902 ins_encode %{ 17903 int vector_len = 1; 17904 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17905 %} 17906 ins_pipe( pipe_slow ); 17907 %} 17908 17909 instruct vsrav8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 17910 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17911 match(Set dst (RShiftVI src shift)); 17912 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17913 ins_encode %{ 17914 int vector_len = 1; 17915 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17916 %} 17917 ins_pipe( pipe_slow ); 17918 %} 17919 17920 instruct vsrav16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17921 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 17922 match(Set dst (RShiftVI src shift)); 17923 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 17924 ins_encode %{ 17925 int vector_len = 2; 17926 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17927 %} 17928 ins_pipe( pipe_slow ); 17929 %} 17930 17931 //Long Variable right shift arithmetic 17932 instruct vsrav1L_reg(vecD dst, vecD src, vecD shift, vecD tmp) %{ 17933 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 17934 match(Set dst (RShiftVL src shift)); 17935 effect(TEMP dst, TEMP tmp); 17936 format %{ "vpsrlvq $dst,$src,$shift\n\t" 17937 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17938 "vpsrlvq $tmp,$tmp,$shift\n\t" 17939 "vpxor $dst,$dst,$tmp\n\t" 17940 "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed1L" %} 17941 ins_encode %{ 17942 int vector_len = 0; 17943 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17944 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17945 __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17946 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17947 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17948 %} 17949 ins_pipe( pipe_slow ); 17950 %} 17951 17952 instruct vsrav1L_reg_evex(vecD dst, vecD src, vecD shift) %{ 17953 predicate(UseAVX > 2 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 17954 match(Set dst (RShiftVL src shift)); 17955 format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed1L" %} 17956 ins_encode %{ 17957 int vector_len = 0; 17958 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17959 %} 17960 ins_pipe( pipe_slow ); 17961 %} 17962 17963 instruct vsrav2L_reg(vecX dst, vecX src, vecX shift, vecX tmp) %{ 17964 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17965 match(Set dst (RShiftVL src shift)); 17966 effect(TEMP dst, TEMP tmp); 17967 format %{ "vpsrlvq $dst,$src,$shift\n\t" 17968 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17969 "vpsrlvq $tmp,$tmp,$shift\n\t" 17970 "vpxor $dst,$dst,$tmp\n\t" 17971 "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed2L" %} 17972 ins_encode %{ 17973 int vector_len = 0; 17974 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17975 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17976 __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17977 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17978 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17979 %} 17980 ins_pipe( pipe_slow ); 17981 %} 17982 17983 instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 17984 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17985 match(Set dst (RShiftVL src shift)); 17986 format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed2L" %} 17987 ins_encode %{ 17988 int vector_len = 0; 17989 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17990 %} 17991 ins_pipe( pipe_slow ); 17992 %} 17993 17994 instruct vsrav4L_reg(vecY dst, vecY src, vecY shift, vecY tmp) %{ 17995 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17996 match(Set dst (RShiftVL src shift)); 17997 effect(TEMP dst, TEMP tmp); 17998 format %{ "vpsrlvq $dst,$src,$shift\n\t" 17999 "vmovdqu $tmp,[0x8000000000000000]\n\t" 18000 "vpsrlvq $tmp,$tmp,$shift\n\t" 18001 "vpxor $dst,$dst,$tmp\n\t" 18002 "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed4L" %} 18003 ins_encode %{ 18004 int vector_len = 1; 18005 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 18006 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 18007 __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 18008 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18009 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18010 %} 18011 ins_pipe( pipe_slow ); 18012 %} 18013 18014 instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 18015 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 18016 match(Set dst (RShiftVL src shift)); 18017 format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 18018 ins_encode %{ 18019 int vector_len = 1; 18020 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 18021 %} 18022 ins_pipe( pipe_slow ); 18023 %} 18024 18025 instruct vsrav8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 18026 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 18027 match(Set dst (RShiftVL src shift)); 18028 format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 18029 ins_encode %{ 18030 int vector_len = 2; 18031 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 18032 %} 18033 ins_pipe( pipe_slow ); 18034 %} 18035 18036 // --------------------------------- AND -------------------------------------- 18037 18038 instruct vand4B(vecS dst, vecS src) %{ 18039 predicate(n->as_Vector()->length_in_bytes() == 4); 18040 match(Set dst (AndV dst src)); 18041 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 18042 ins_encode %{ 18043 __ pand($dst$$XMMRegister, $src$$XMMRegister); 18044 %} 18045 ins_pipe( pipe_slow ); 18046 %} 18047 18048 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 18049 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18050 match(Set dst (AndV src1 src2)); 18051 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 18052 ins_encode %{ 18053 int vector_len = 0; 18054 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18055 %} 18056 ins_pipe( pipe_slow ); 18057 %} 18058 18059 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 18060 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18061 match(Set dst (AndV src (LoadVector mem))); 18062 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 18063 ins_encode %{ 18064 int vector_len = 0; 18065 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18066 %} 18067 ins_pipe( pipe_slow ); 18068 %} 18069 18070 instruct vand8B(vecD dst, vecD src) %{ 18071 predicate(n->as_Vector()->length_in_bytes() == 8); 18072 match(Set dst (AndV dst src)); 18073 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 18074 ins_encode %{ 18075 __ pand($dst$$XMMRegister, $src$$XMMRegister); 18076 %} 18077 ins_pipe( pipe_slow ); 18078 %} 18079 18080 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 18081 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18082 match(Set dst (AndV src1 src2)); 18083 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 18084 ins_encode %{ 18085 int vector_len = 0; 18086 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18087 %} 18088 ins_pipe( pipe_slow ); 18089 %} 18090 18091 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 18092 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18093 match(Set dst (AndV src (LoadVector mem))); 18094 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 18095 ins_encode %{ 18096 int vector_len = 0; 18097 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18098 %} 18099 ins_pipe( pipe_slow ); 18100 %} 18101 18102 instruct vand16B(vecX dst, vecX src) %{ 18103 predicate(n->as_Vector()->length_in_bytes() == 16); 18104 match(Set dst (AndV dst src)); 18105 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 18106 ins_encode %{ 18107 __ pand($dst$$XMMRegister, $src$$XMMRegister); 18108 %} 18109 ins_pipe( pipe_slow ); 18110 %} 18111 18112 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 18113 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18114 match(Set dst (AndV src1 src2)); 18115 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 18116 ins_encode %{ 18117 int vector_len = 0; 18118 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18119 %} 18120 ins_pipe( pipe_slow ); 18121 %} 18122 18123 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 18124 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18125 match(Set dst (AndV src (LoadVector mem))); 18126 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 18127 ins_encode %{ 18128 int vector_len = 0; 18129 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18130 %} 18131 ins_pipe( pipe_slow ); 18132 %} 18133 18134 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 18135 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18136 match(Set dst (AndV src1 src2)); 18137 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 18138 ins_encode %{ 18139 int vector_len = 1; 18140 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18141 %} 18142 ins_pipe( pipe_slow ); 18143 %} 18144 18145 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 18146 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18147 match(Set dst (AndV src (LoadVector mem))); 18148 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 18149 ins_encode %{ 18150 int vector_len = 1; 18151 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18152 %} 18153 ins_pipe( pipe_slow ); 18154 %} 18155 18156 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 18157 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18158 match(Set dst (AndV src1 src2)); 18159 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 18160 ins_encode %{ 18161 int vector_len = 2; 18162 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18163 %} 18164 ins_pipe( pipe_slow ); 18165 %} 18166 18167 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 18168 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18169 match(Set dst (AndV src (LoadVector mem))); 18170 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 18171 ins_encode %{ 18172 int vector_len = 2; 18173 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18174 %} 18175 ins_pipe( pipe_slow ); 18176 %} 18177 18178 // --------------------------------- OR --------------------------------------- 18179 18180 instruct vor4B(vecS dst, vecS src) %{ 18181 predicate(n->as_Vector()->length_in_bytes() == 4); 18182 match(Set dst (OrV dst src)); 18183 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 18184 ins_encode %{ 18185 __ por($dst$$XMMRegister, $src$$XMMRegister); 18186 %} 18187 ins_pipe( pipe_slow ); 18188 %} 18189 18190 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 18191 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18192 match(Set dst (OrV src1 src2)); 18193 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 18194 ins_encode %{ 18195 int vector_len = 0; 18196 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18197 %} 18198 ins_pipe( pipe_slow ); 18199 %} 18200 18201 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 18202 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18203 match(Set dst (OrV src (LoadVector mem))); 18204 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 18205 ins_encode %{ 18206 int vector_len = 0; 18207 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18208 %} 18209 ins_pipe( pipe_slow ); 18210 %} 18211 18212 instruct vor8B(vecD dst, vecD src) %{ 18213 predicate(n->as_Vector()->length_in_bytes() == 8); 18214 match(Set dst (OrV dst src)); 18215 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 18216 ins_encode %{ 18217 __ por($dst$$XMMRegister, $src$$XMMRegister); 18218 %} 18219 ins_pipe( pipe_slow ); 18220 %} 18221 18222 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 18223 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18224 match(Set dst (OrV src1 src2)); 18225 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 18226 ins_encode %{ 18227 int vector_len = 0; 18228 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18229 %} 18230 ins_pipe( pipe_slow ); 18231 %} 18232 18233 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 18234 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18235 match(Set dst (OrV src (LoadVector mem))); 18236 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 18237 ins_encode %{ 18238 int vector_len = 0; 18239 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18240 %} 18241 ins_pipe( pipe_slow ); 18242 %} 18243 18244 instruct vor16B(vecX dst, vecX src) %{ 18245 predicate(n->as_Vector()->length_in_bytes() == 16); 18246 match(Set dst (OrV dst src)); 18247 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 18248 ins_encode %{ 18249 __ por($dst$$XMMRegister, $src$$XMMRegister); 18250 %} 18251 ins_pipe( pipe_slow ); 18252 %} 18253 18254 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 18255 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18256 match(Set dst (OrV src1 src2)); 18257 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 18258 ins_encode %{ 18259 int vector_len = 0; 18260 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18261 %} 18262 ins_pipe( pipe_slow ); 18263 %} 18264 18265 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 18266 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18267 match(Set dst (OrV src (LoadVector mem))); 18268 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 18269 ins_encode %{ 18270 int vector_len = 0; 18271 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18272 %} 18273 ins_pipe( pipe_slow ); 18274 %} 18275 18276 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 18277 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18278 match(Set dst (OrV src1 src2)); 18279 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 18280 ins_encode %{ 18281 int vector_len = 1; 18282 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18283 %} 18284 ins_pipe( pipe_slow ); 18285 %} 18286 18287 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 18288 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18289 match(Set dst (OrV src (LoadVector mem))); 18290 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 18291 ins_encode %{ 18292 int vector_len = 1; 18293 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18294 %} 18295 ins_pipe( pipe_slow ); 18296 %} 18297 18298 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 18299 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18300 match(Set dst (OrV src1 src2)); 18301 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 18302 ins_encode %{ 18303 int vector_len = 2; 18304 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18305 %} 18306 ins_pipe( pipe_slow ); 18307 %} 18308 18309 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 18310 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18311 match(Set dst (OrV src (LoadVector mem))); 18312 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 18313 ins_encode %{ 18314 int vector_len = 2; 18315 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18316 %} 18317 ins_pipe( pipe_slow ); 18318 %} 18319 18320 // --------------------------------- XOR -------------------------------------- 18321 18322 instruct vxor4B(vecS dst, vecS src) %{ 18323 predicate(n->as_Vector()->length_in_bytes() == 4); 18324 match(Set dst (XorV dst src)); 18325 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 18326 ins_encode %{ 18327 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 18328 %} 18329 ins_pipe( pipe_slow ); 18330 %} 18331 18332 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 18333 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18334 match(Set dst (XorV src1 src2)); 18335 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 18336 ins_encode %{ 18337 int vector_len = 0; 18338 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18339 %} 18340 ins_pipe( pipe_slow ); 18341 %} 18342 18343 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 18344 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18345 match(Set dst (XorV src (LoadVector mem))); 18346 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 18347 ins_encode %{ 18348 int vector_len = 0; 18349 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18350 %} 18351 ins_pipe( pipe_slow ); 18352 %} 18353 18354 instruct vxor8B(vecD dst, vecD src) %{ 18355 predicate(n->as_Vector()->length_in_bytes() == 8); 18356 match(Set dst (XorV dst src)); 18357 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 18358 ins_encode %{ 18359 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 18360 %} 18361 ins_pipe( pipe_slow ); 18362 %} 18363 18364 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 18365 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18366 match(Set dst (XorV src1 src2)); 18367 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 18368 ins_encode %{ 18369 int vector_len = 0; 18370 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18371 %} 18372 ins_pipe( pipe_slow ); 18373 %} 18374 18375 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 18376 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18377 match(Set dst (XorV src (LoadVector mem))); 18378 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 18379 ins_encode %{ 18380 int vector_len = 0; 18381 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18382 %} 18383 ins_pipe( pipe_slow ); 18384 %} 18385 18386 instruct vxor16B(vecX dst, vecX src) %{ 18387 predicate(n->as_Vector()->length_in_bytes() == 16); 18388 match(Set dst (XorV dst src)); 18389 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 18390 ins_encode %{ 18391 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 18392 %} 18393 ins_pipe( pipe_slow ); 18394 %} 18395 18396 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 18397 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18398 match(Set dst (XorV src1 src2)); 18399 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 18400 ins_encode %{ 18401 int vector_len = 0; 18402 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18403 %} 18404 ins_pipe( pipe_slow ); 18405 %} 18406 18407 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 18408 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18409 match(Set dst (XorV src (LoadVector mem))); 18410 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 18411 ins_encode %{ 18412 int vector_len = 0; 18413 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18414 %} 18415 ins_pipe( pipe_slow ); 18416 %} 18417 18418 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 18419 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18420 match(Set dst (XorV src1 src2)); 18421 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 18422 ins_encode %{ 18423 int vector_len = 1; 18424 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18425 %} 18426 ins_pipe( pipe_slow ); 18427 %} 18428 18429 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 18430 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18431 match(Set dst (XorV src (LoadVector mem))); 18432 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 18433 ins_encode %{ 18434 int vector_len = 1; 18435 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18436 %} 18437 ins_pipe( pipe_slow ); 18438 %} 18439 18440 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 18441 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18442 match(Set dst (XorV src1 src2)); 18443 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 18444 ins_encode %{ 18445 int vector_len = 2; 18446 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18447 %} 18448 ins_pipe( pipe_slow ); 18449 %} 18450 18451 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 18452 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18453 match(Set dst (XorV src (LoadVector mem))); 18454 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 18455 ins_encode %{ 18456 int vector_len = 2; 18457 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18458 %} 18459 ins_pipe( pipe_slow ); 18460 %} 18461 18462 instruct vcvt4Bto4S_reg(vecD dst, vecS src) %{ 18463 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18464 match(Set dst (VectorCastB2X src)); 18465 format %{ "vpmovsxbw $dst,$src\t! convert 4B to 4S vector" %} 18466 ins_encode %{ 18467 int vector_len = 0; 18468 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18469 %} 18470 ins_pipe( pipe_slow ); 18471 %} 18472 18473 instruct vcvt8Bto8S_reg(vecX dst, vecD src) %{ 18474 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18475 match(Set dst (VectorCastB2X src)); 18476 format %{ "vpmovsxbw $dst,$src\t! convert 8B to 8S vector" %} 18477 ins_encode %{ 18478 int vector_len = 0; 18479 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18480 %} 18481 ins_pipe( pipe_slow ); 18482 %} 18483 18484 instruct vcvt16Bto16S_reg(vecY dst, vecX src) %{ 18485 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18486 match(Set dst (VectorCastB2X src)); 18487 format %{ "vpmovsxbw $dst,$src\t! convert 16B to 16S vector" %} 18488 ins_encode %{ 18489 int vector_len = 1; 18490 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18491 %} 18492 ins_pipe( pipe_slow ); 18493 %} 18494 18495 instruct vcvt32Bto32S_reg(vecZ dst, vecY src) %{ 18496 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18497 match(Set dst (VectorCastB2X src)); 18498 format %{ "vpmovsxbw $dst,$src\t! convert 32B to 32S vector" %} 18499 ins_encode %{ 18500 int vector_len = 2; 18501 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18502 %} 18503 ins_pipe( pipe_slow ); 18504 %} 18505 18506 instruct vcvt4Bto4I_reg(vecX dst, vecS src) %{ 18507 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18508 match(Set dst (VectorCastB2X src)); 18509 format %{ "vpmovsxbd $dst,$src\t! convert 4B to 4I vector" %} 18510 ins_encode %{ 18511 int vector_len = 0; 18512 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18513 %} 18514 ins_pipe( pipe_slow ); 18515 %} 18516 18517 instruct vcvt8Bto8I_reg(vecY dst, vecD src) %{ 18518 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18519 match(Set dst (VectorCastB2X src)); 18520 format %{ "vpmovsxbd $dst,$src\t! convert 8B to 8I vector" %} 18521 ins_encode %{ 18522 int vector_len = 1; 18523 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18524 %} 18525 ins_pipe( pipe_slow ); 18526 %} 18527 18528 instruct vcvt16Bto16I_reg(vecZ dst, vecX src) %{ 18529 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18530 match(Set dst (VectorCastB2X src)); 18531 format %{ "vpmovsxbd $dst,$src\t! convert 16B to 16I vector" %} 18532 ins_encode %{ 18533 int vector_len = 2; 18534 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18535 %} 18536 ins_pipe( pipe_slow ); 18537 %} 18538 18539 instruct vcvt4Bto4L_reg(vecY dst, vecS src) %{ 18540 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18541 match(Set dst (VectorCastB2X src)); 18542 format %{ "vpmovsxbq $dst,$src\t! convert 4B to 4L vector" %} 18543 ins_encode %{ 18544 int vector_len = 1; 18545 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18546 %} 18547 ins_pipe( pipe_slow ); 18548 %} 18549 18550 instruct vcvt8Bto8L_reg(vecZ dst, vecD src) %{ 18551 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18552 match(Set dst (VectorCastB2X src)); 18553 format %{ "vpmovsxbq $dst,$src\t! convert 8B to 8L vector" %} 18554 ins_encode %{ 18555 int vector_len = 2; 18556 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18557 %} 18558 ins_pipe( pipe_slow ); 18559 %} 18560 18561 instruct vcvt4Bto4F_reg(vecX dst, vecS src) %{ 18562 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18563 match(Set dst (VectorCastB2X src)); 18564 format %{ "vpmovsxbd $dst,$src\n\t" 18565 "vcvtdq2ps $dst,$dst\t! convert 4B to 4F vector" %} 18566 ins_encode %{ 18567 int vector_len = 0; 18568 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18569 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18570 %} 18571 ins_pipe( pipe_slow ); 18572 %} 18573 18574 instruct vcvt8Bto8F_reg(vecY dst, vecD src) %{ 18575 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18576 match(Set dst (VectorCastB2X src)); 18577 format %{ "vpmovsxbd $dst,$src\n\t" 18578 "vcvtdq2ps $dst,$dst\t! convert 8B to 8F vector" %} 18579 ins_encode %{ 18580 int vector_len = 1; 18581 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18582 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18583 %} 18584 ins_pipe( pipe_slow ); 18585 %} 18586 18587 instruct vcvt16Bto16F_reg(vecZ dst, vecX src) %{ 18588 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18589 match(Set dst (VectorCastB2X src)); 18590 format %{ "vpmovsxbd $dst,$src\n\t" 18591 "vcvtdq2ps $dst,$dst\t! convert 16B to 16F vector" %} 18592 ins_encode %{ 18593 int vector_len = 2; 18594 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18595 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18596 %} 18597 ins_pipe( pipe_slow ); 18598 %} 18599 18600 instruct vcvt4Bto4D_reg(vecY dst, vecS src) %{ 18601 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18602 match(Set dst (VectorCastB2X src)); 18603 format %{ "vpmovsxbd $dst,$src\n\t" 18604 "vcvtdq2pd $dst,$dst\t! convert 4B to 4D vector" %} 18605 ins_encode %{ 18606 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, 0); 18607 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, 1); 18608 %} 18609 ins_pipe( pipe_slow ); 18610 %} 18611 18612 instruct vcvt8Bto8D_reg(vecZ dst, vecD src) %{ 18613 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18614 match(Set dst (VectorCastB2X src)); 18615 format %{ "vpmovsxbd $dst,$src\n\t" 18616 "vcvtdq2pd $dst,$dst\t! convert 8B to 8D vector" %} 18617 ins_encode %{ 18618 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, 1); 18619 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, 2); 18620 %} 18621 ins_pipe( pipe_slow ); 18622 %} 18623 18624 instruct vcvt4Sto4B_reg(vecS dst, vecD src, rRegL scratch) %{ 18625 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18626 effect(TEMP scratch); 18627 match(Set dst (VectorCastS2X src)); 18628 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 18629 "vpackuswb $dst,$dst\t! convert 4S to 4B vector" %} 18630 ins_encode %{ 18631 int vector_len = 0; 18632 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 18633 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18634 %} 18635 ins_pipe( pipe_slow ); 18636 %} 18637 18638 instruct vcvt8Sto8B_reg(vecD dst, vecX src, rRegL scratch) %{ 18639 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18640 effect(TEMP scratch); 18641 match(Set dst (VectorCastS2X src)); 18642 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 18643 "vpackuswb $dst,$dst\t! convert 8S to 8B vector" %} 18644 ins_encode %{ 18645 int vector_len = 0; 18646 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 18647 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18648 %} 18649 ins_pipe( pipe_slow ); 18650 %} 18651 18652 instruct vcvt16Sto16B_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ 18653 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18654 effect(TEMP scratch, TEMP tmp); 18655 match(Set dst (VectorCastS2X src)); 18656 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 18657 "vextracti128 $tmp,$dst,0x1\n\t" 18658 "vpackuswb $dst,$dst,$tmp\t! convert 16S to 16B vector" %} 18659 ins_encode %{ 18660 int vector_len = 1; 18661 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 18662 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18663 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18664 %} 18665 ins_pipe( pipe_slow ); 18666 %} 18667 18668 instruct vcvt32Sto32B_reg(vecY dst, vecZ src) %{ 18669 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18670 match(Set dst (VectorCastS2X src)); 18671 format %{ "evpmovwb $dst,$src\t! convert 32S to 32B vector" %} 18672 ins_encode %{ 18673 int vector_len = 2; 18674 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18675 %} 18676 ins_pipe( pipe_slow ); 18677 %} 18678 18679 instruct vcvt2Sto2I_reg(vecD dst, vecS src) %{ 18680 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18681 match(Set dst (VectorCastS2X src)); 18682 format %{ "vpmovsxwd $dst,$src\t! convert 2S to 2I vector" %} 18683 ins_encode %{ 18684 int vector_len = 0; 18685 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18686 %} 18687 ins_pipe( pipe_slow ); 18688 %} 18689 18690 instruct vcvt4Sto4I_reg(vecX dst, vecD src) %{ 18691 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18692 match(Set dst (VectorCastS2X src)); 18693 format %{ "vpmovsxwd $dst,$src\t! convert 4S to 4I vector" %} 18694 ins_encode %{ 18695 int vector_len = 0; 18696 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18697 %} 18698 ins_pipe( pipe_slow ); 18699 %} 18700 18701 instruct vcvt8Sto8I_reg(vecY dst, vecX src) %{ 18702 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18703 match(Set dst (VectorCastS2X src)); 18704 format %{ "vpmovsxwd $dst,$src\t! convert 8S to 8I vector" %} 18705 ins_encode %{ 18706 int vector_len = 1; 18707 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18708 %} 18709 ins_pipe( pipe_slow ); 18710 %} 18711 18712 instruct vcvt16Sto16I_reg(vecZ dst, vecY src) %{ 18713 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18714 match(Set dst (VectorCastS2X src)); 18715 format %{ "vpmovsxwd $dst,$src\t! convert 16S to 16I vector" %} 18716 ins_encode %{ 18717 int vector_len = 2; 18718 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18719 %} 18720 ins_pipe( pipe_slow ); 18721 %} 18722 18723 instruct vcvt2Sto2L_reg(vecX dst, vecS src) %{ 18724 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18725 match(Set dst (VectorCastS2X src)); 18726 format %{ "vpmovsxwq $dst,$src\t! convert 2S to 2L vector" %} 18727 ins_encode %{ 18728 int vector_len = 0; 18729 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18730 %} 18731 ins_pipe( pipe_slow ); 18732 %} 18733 18734 instruct vcvt4Sto4L_reg(vecY dst, vecD src) %{ 18735 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18736 match(Set dst (VectorCastS2X src)); 18737 format %{ "vpmovsxwq $dst,$src\t! convert 4S to 4L vector" %} 18738 ins_encode %{ 18739 int vector_len = 1; 18740 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18741 %} 18742 ins_pipe( pipe_slow ); 18743 %} 18744 18745 instruct vcvt8Sto8L_reg(vecZ dst, vecX src) %{ 18746 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18747 match(Set dst (VectorCastS2X src)); 18748 format %{ "vpmovsxwq $dst,$src\t! convert 8S to 8L vector" %} 18749 ins_encode %{ 18750 int vector_len = 2; 18751 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18752 %} 18753 ins_pipe( pipe_slow ); 18754 %} 18755 18756 instruct vcvt2Sto2F_reg(vecD dst, vecS src) %{ 18757 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18758 match(Set dst (VectorCastS2X src)); 18759 format %{ "vpmovsxwd $dst,$src\n\t" 18760 "vcvtdq2ps $dst,$dst\t! convert 2S to 2F vector" %} 18761 ins_encode %{ 18762 int vector_len = 0; 18763 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18764 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18765 %} 18766 ins_pipe( pipe_slow ); 18767 %} 18768 18769 instruct vcvt4Sto4F_reg(vecX dst, vecD src) %{ 18770 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18771 match(Set dst (VectorCastS2X src)); 18772 format %{ "vpmovsxwd $dst,$src\n\t" 18773 "vcvtdq2ps $dst,$dst\t! convert 4S to 4F vector" %} 18774 ins_encode %{ 18775 int vector_len = 0; 18776 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18777 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18778 %} 18779 ins_pipe( pipe_slow ); 18780 %} 18781 18782 instruct vcvt8Sto8F_reg(vecY dst, vecX src) %{ 18783 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18784 match(Set dst (VectorCastS2X src)); 18785 format %{ "vpmovsxwd $dst,$src\n\t" 18786 "vcvtdq2ps $dst,$dst\t! convert 8S to 8F vector" %} 18787 ins_encode %{ 18788 int vector_len = 1; 18789 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18790 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18791 %} 18792 ins_pipe( pipe_slow ); 18793 %} 18794 18795 instruct vcvt16Sto16F_reg(vecZ dst, vecY src) %{ 18796 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18797 match(Set dst (VectorCastS2X src)); 18798 format %{ "vpmovsxwd $dst,$src\n\t" 18799 "vcvtdq2ps $dst,$dst\t! convert 16S to 16F vector" %} 18800 ins_encode %{ 18801 int vector_len = 2; 18802 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18803 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18804 %} 18805 ins_pipe( pipe_slow ); 18806 %} 18807 18808 instruct vcvt2Sto2D_reg(vecX dst, vecS src) %{ 18809 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18810 match(Set dst (VectorCastS2X src)); 18811 format %{ "vpmovsxwd $dst,$src\n\t" 18812 "vcvtdq2pd $dst,$dst\t! convert 2S to 2D vector" %} 18813 ins_encode %{ 18814 int vector_len = 0; 18815 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18816 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18817 %} 18818 ins_pipe( pipe_slow ); 18819 %} 18820 18821 instruct vcvt4Sto4D_reg(vecY dst, vecD src) %{ 18822 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18823 match(Set dst (VectorCastS2X src)); 18824 format %{ "vpmovsxwd $dst,$src\n\t" 18825 "vcvtdq2pd $dst,$dst\t! convert 4S to 4D vector" %} 18826 ins_encode %{ 18827 int vector_len = 1; 18828 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18829 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18830 %} 18831 ins_pipe( pipe_slow ); 18832 %} 18833 18834 instruct vcvt8Sto8D_reg(vecZ dst, vecX src) %{ 18835 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18836 match(Set dst (VectorCastS2X src)); 18837 format %{ "vpmovsxwd $dst,$src\n\t" 18838 "vcvtdq2pd $dst,$dst\t! convert 8S to 8D vector" %} 18839 ins_encode %{ 18840 int vector_len = 2; 18841 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18842 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18843 %} 18844 ins_pipe( pipe_slow ); 18845 %} 18846 18847 instruct vcvt4Ito4B_reg(vecS dst, vecX src, rRegL scratch) %{ 18848 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18849 effect(TEMP scratch); 18850 match(Set dst (VectorCastI2X src)); 18851 format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" 18852 "vpackusdw $dst,$dst\n\t" 18853 "vpackuswb $dst,$dst\t! convert 4I to 4B vector" %} 18854 ins_encode %{ 18855 int vector_len = 0; 18856 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 18857 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18858 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18859 %} 18860 ins_pipe( pipe_slow ); 18861 %} 18862 18863 instruct vcvt8Ito8B_reg(vecD dst, vecY src, vecY tmp, rRegL scratch) %{ 18864 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18865 effect(TEMP scratch, TEMP tmp); 18866 match(Set dst (VectorCastI2X src)); 18867 format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" 18868 "vextracti128 $tmp,$dst,0x1\n\t" 18869 "vpackusdw $dst,$dst,$tmp\n\t" 18870 "vpackuswb $dst,$dst\t! convert 8I to 8B vector" %} 18871 ins_encode %{ 18872 int vector_len = 1; 18873 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 18874 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18875 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18876 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 18877 %} 18878 ins_pipe( pipe_slow ); 18879 %} 18880 18881 instruct vcvt16Ito16B_reg(vecX dst, vecZ src) %{ 18882 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18883 match(Set dst (VectorCastI2X src)); 18884 format %{ "evpmovdb $dst,$src\t! convert 16I to 16B vector" %} 18885 ins_encode %{ 18886 int vector_len = 2; 18887 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18888 %} 18889 ins_pipe( pipe_slow ); 18890 %} 18891 18892 instruct vcvt2Ito2S_reg(vecS dst, vecD src, rRegL scratch) %{ 18893 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18894 effect(TEMP scratch); 18895 match(Set dst (VectorCastI2X src)); 18896 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 18897 "vpackusdw $dst,$dst\t! convert 2I to 2S vector" %} 18898 ins_encode %{ 18899 int vector_len = 0; 18900 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 18901 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18902 %} 18903 ins_pipe( pipe_slow ); 18904 %} 18905 18906 instruct vcvt4Ito4S_reg(vecD dst, vecX src, rRegL scratch) %{ 18907 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18908 effect(TEMP scratch); 18909 match(Set dst (VectorCastI2X src)); 18910 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 18911 "vpackusdw $dst,$dst\t! convert 4I to 4S vector" %} 18912 ins_encode %{ 18913 int vector_len = 0; 18914 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 18915 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18916 %} 18917 ins_pipe( pipe_slow ); 18918 %} 18919 18920 instruct vcvt8Ito8S_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ 18921 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18922 effect(TEMP scratch, TEMP tmp); 18923 match(Set dst (VectorCastI2X src)); 18924 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 18925 "vextracti128 $tmp,$dst,0x1\n\t" 18926 "vpackusdw $dst,$dst,$tmp\t! convert 8I to 8S vector" %} 18927 ins_encode %{ 18928 int vector_len = 1; 18929 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 18930 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18931 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18932 %} 18933 ins_pipe( pipe_slow ); 18934 %} 18935 18936 instruct vcvt16Ito16S_reg(vecY dst, vecZ src) %{ 18937 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18938 match(Set dst (VectorCastI2X src)); 18939 format %{ "evpmovdw $dst,$src\t! convert 16I to 16S vector" %} 18940 ins_encode %{ 18941 int vector_len = 2; 18942 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18943 %} 18944 ins_pipe( pipe_slow ); 18945 %} 18946 18947 instruct vcvt2Ito2L_reg(vecX dst, vecD src) %{ 18948 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18949 match(Set dst (VectorCastI2X src)); 18950 format %{ "vpmovsxdq $dst,$src\t! convert 2I to 2L vector" %} 18951 ins_encode %{ 18952 int vector_len = 0; 18953 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18954 %} 18955 ins_pipe( pipe_slow ); 18956 %} 18957 18958 instruct vcvt4Ito4L_reg(vecY dst, vecX src) %{ 18959 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18960 match(Set dst (VectorCastI2X src)); 18961 format %{ "vpmovsxdq $dst,$src\t! convert 4I to 4L vector" %} 18962 ins_encode %{ 18963 int vector_len = 1; 18964 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18965 %} 18966 ins_pipe( pipe_slow ); 18967 %} 18968 18969 instruct vcvt8Ito8L_reg(vecZ dst, vecY src) %{ 18970 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18971 match(Set dst (VectorCastI2X src)); 18972 format %{ "vpmovsxdq $dst,$src\t! convert 8I to 8L vector" %} 18973 ins_encode %{ 18974 int vector_len = 2; 18975 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18976 %} 18977 ins_pipe( pipe_slow ); 18978 %} 18979 18980 instruct vcvt2Ito2F_reg(vecD dst, vecD src) %{ 18981 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18982 match(Set dst (VectorCastI2X src)); 18983 format %{ "vcvtdq2ps $dst,$src\t! convert 2I to 2F vector" %} 18984 ins_encode %{ 18985 int vector_len = 0; 18986 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18987 %} 18988 ins_pipe( pipe_slow ); 18989 %} 18990 18991 instruct vcvt4Ito4F_reg(vecX dst, vecX src) %{ 18992 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18993 match(Set dst (VectorCastI2X src)); 18994 format %{ "vcvtdq2ps $dst,$src\t! convert 4I to 4F vector" %} 18995 ins_encode %{ 18996 int vector_len = 0; 18997 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18998 %} 18999 ins_pipe( pipe_slow ); 19000 %} 19001 19002 instruct vcvt8Ito8F_reg(vecY dst, vecY src) %{ 19003 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19004 match(Set dst (VectorCastI2X src)); 19005 format %{ "vcvtdq2ps $dst,$src\t! convert 8I to 8F vector" %} 19006 ins_encode %{ 19007 int vector_len = 1; 19008 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19009 %} 19010 ins_pipe( pipe_slow ); 19011 %} 19012 19013 instruct vcvt16Ito16F_reg(vecY dst, vecY src) %{ 19014 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19015 match(Set dst (VectorCastI2X src)); 19016 format %{ "vcvtdq2ps $dst,$src\t! convert 16I to 16F vector" %} 19017 ins_encode %{ 19018 int vector_len = 2; 19019 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19020 %} 19021 ins_pipe( pipe_slow ); 19022 %} 19023 19024 instruct vcvt2Ito2D_reg(vecX dst, vecD src) %{ 19025 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19026 match(Set dst (VectorCastI2X src)); 19027 format %{ "vcvtdq2pd $dst,$src\t! convert 2I to 2D vector" %} 19028 ins_encode %{ 19029 int vector_len = 0; 19030 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19031 %} 19032 ins_pipe( pipe_slow ); 19033 %} 19034 19035 instruct vcvt4Ito4D_reg(vecY dst, vecX src) %{ 19036 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19037 match(Set dst (VectorCastI2X src)); 19038 format %{ "vcvtdq2pd $dst,$src\t! convert 4I to 4D vector" %} 19039 ins_encode %{ 19040 int vector_len = 1; 19041 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19042 %} 19043 ins_pipe( pipe_slow ); 19044 %} 19045 19046 instruct vcvt8Ito8D_reg(vecZ dst, vecY src) %{ 19047 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19048 match(Set dst (VectorCastI2X src)); 19049 format %{ "vcvtdq2pd $dst,$src\t! convert 8I to 8D vector" %} 19050 ins_encode %{ 19051 int vector_len = 2; 19052 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19053 %} 19054 ins_pipe( pipe_slow ); 19055 %} 19056 19057 instruct vcvt4Lto4B_reg(vecS dst, vecY src, rRegL scratch) %{ 19058 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 19059 match(Set dst (VectorCastL2X src)); 19060 effect(TEMP scratch); 19061 format %{ "vpermilps $dst,$src,8\n\t" 19062 "vpermpd $dst,$dst,8\n\t" 19063 "vpand $dst,$dst,[0x000000FF000000FF]\n\t" 19064 "vpackusdw $dst,$dst\n\t" 19065 "vpackuswb $dst,$dst\t! convert 4L to 4B vector" %} 19066 ins_encode %{ 19067 int vector_len = 1; 19068 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19069 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 19070 // Since cast to int has been done, do rest of operations in 128. 19071 vector_len = 0; 19072 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 19073 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19074 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19075 %} 19076 ins_pipe( pipe_slow ); 19077 %} 19078 19079 instruct vcvt8Lto8B_reg(vecD dst, vecZ src) %{ 19080 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 19081 match(Set dst (VectorCastL2X src)); 19082 format %{ "evpmovqb $dst,$src\t! convert 8L to 8B vector" %} 19083 ins_encode %{ 19084 int vector_len = 2; 19085 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19086 %} 19087 ins_pipe( pipe_slow ); 19088 %} 19089 19090 instruct vcvt2Lto2S_reg(vecS dst, vecX src, rRegL scratch) %{ 19091 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 19092 match(Set dst (VectorCastL2X src)); 19093 effect(TEMP scratch); 19094 format %{ "vpshufd $dst,$src,8\n\t" 19095 "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" 19096 "vpackusdw $dst,$dst\t! convert 2L to 2S vector" %} 19097 ins_encode %{ 19098 int vector_len = 0; 19099 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19100 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 19101 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19102 %} 19103 ins_pipe( pipe_slow ); 19104 %} 19105 19106 instruct vcvt4Lto4S_reg(vecD dst, vecY src, rRegL scratch) %{ 19107 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 19108 match(Set dst (VectorCastL2X src)); 19109 effect(TEMP scratch); 19110 format %{ "vpermilps $dst,$src,8\n\t" 19111 "vpermpd $dst,$dst,8\n\t" 19112 "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" 19113 "vpackusdw $dst,$dst\t! convert 4L to 4S vector" %} 19114 ins_encode %{ 19115 int vector_len = 1; 19116 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19117 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 19118 // Since cast to int has been done, do rest of operations in 128. 19119 vector_len = 0; 19120 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 19121 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19122 %} 19123 ins_pipe( pipe_slow ); 19124 %} 19125 19126 instruct vcvt8Lto8S_reg(vecX dst, vecZ src) %{ 19127 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 19128 match(Set dst (VectorCastL2X src)); 19129 format %{ "evpmovqw $dst,$src\t! convert 8L to 8S vector" %} 19130 ins_encode %{ 19131 int vector_len = 2; 19132 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19133 %} 19134 ins_pipe( pipe_slow ); 19135 %} 19136 19137 instruct vcvt1Lto1I_reg(vecS dst, vecD src) %{ 19138 predicate(n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19139 match(Set dst (VectorCastL2X src)); 19140 format %{ "movdqu $dst,$src\t! convert 1L to 1I vector" %} 19141 ins_encode %{ 19142 // If register is the same, then move is not needed. 19143 if ($dst$$XMMRegister != $src$$XMMRegister) { 19144 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 19145 } 19146 %} 19147 ins_pipe( pipe_slow ); 19148 %} 19149 19150 instruct vcvt2Lto2I_reg(vecD dst, vecX src) %{ 19151 predicate(UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19152 match(Set dst (VectorCastL2X src)); 19153 format %{ "pshufd $dst,$src,8\t! convert 2L to 2I vector" %} 19154 ins_encode %{ 19155 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 19156 %} 19157 ins_pipe( pipe_slow ); 19158 %} 19159 19160 instruct vcvt2Lto2I_reg_avx(vecD dst, vecX src) %{ 19161 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19162 match(Set dst (VectorCastL2X src)); 19163 format %{ "vpshufd $dst,$src,8\t! convert 2L to 2I vector" %} 19164 ins_encode %{ 19165 int vector_len = 0; 19166 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19167 %} 19168 ins_pipe( pipe_slow ); 19169 %} 19170 19171 instruct vcvt4Lto4I_reg(vecX dst, vecY src) %{ 19172 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19173 match(Set dst (VectorCastL2X src)); 19174 format %{ "vpermilps $dst,$src,8\n\t" 19175 "vpermpd $dst,$dst,8\t! convert 4L to 4I vector" %} 19176 ins_encode %{ 19177 int vector_len = 1; 19178 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19179 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 19180 %} 19181 ins_pipe( pipe_slow ); 19182 %} 19183 19184 instruct vcvt8Lto8I_reg(vecY dst, vecZ src) %{ 19185 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19186 match(Set dst (VectorCastL2X src)); 19187 format %{ "evpmovqd $dst,$src\t! convert 8L to 8I vector" %} 19188 ins_encode %{ 19189 int vector_len = 2; 19190 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19191 %} 19192 ins_pipe( pipe_slow ); 19193 %} 19194 19195 instruct vcvt2Lto2F_reg(vecD dst, vecX src) %{ 19196 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19197 match(Set dst (VectorCastL2X src)); 19198 format %{ "vcvtqq2ps $dst,$src\t! convert 2L to 2F vector" %} 19199 ins_encode %{ 19200 int vector_len = 0; 19201 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19202 %} 19203 ins_pipe( pipe_slow ); 19204 %} 19205 19206 instruct vcvt4Lto4F_reg(vecX dst, vecY src) %{ 19207 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19208 match(Set dst (VectorCastL2X src)); 19209 format %{ "vcvtqq2ps $dst,$src\t! convert 4L to 4F vector" %} 19210 ins_encode %{ 19211 int vector_len = 1; 19212 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19213 %} 19214 ins_pipe( pipe_slow ); 19215 %} 19216 19217 instruct vcvt8Lto8F_reg(vecY dst, vecZ src) %{ 19218 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19219 match(Set dst (VectorCastL2X src)); 19220 format %{ "vcvtqq2ps $dst,$src\t! convert 8L to 8F vector" %} 19221 ins_encode %{ 19222 int vector_len = 2; 19223 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19224 %} 19225 ins_pipe( pipe_slow ); 19226 %} 19227 19228 instruct vcvt1Lto1D_reg(vecD dst, vecD src) %{ 19229 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19230 match(Set dst (VectorCastL2X src)); 19231 format %{ "vcvtqq2pd $dst,$src\t! convert 1L to 1D vector" %} 19232 ins_encode %{ 19233 int vector_len = 0; 19234 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19235 %} 19236 ins_pipe( pipe_slow ); 19237 %} 19238 19239 instruct vcvt2Lto2D_reg(vecX dst, vecX src) %{ 19240 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19241 match(Set dst (VectorCastL2X src)); 19242 format %{ "vcvtqq2pd $dst,$src\t! convert 2L to 2D vector" %} 19243 ins_encode %{ 19244 int vector_len = 0; 19245 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19246 %} 19247 ins_pipe( pipe_slow ); 19248 %} 19249 19250 instruct vcvt4Lto4D_reg(vecY dst, vecY src) %{ 19251 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19252 match(Set dst (VectorCastL2X src)); 19253 format %{ "vcvtqq2pd $dst,$src\t! convert 4L to 4D vector" %} 19254 ins_encode %{ 19255 int vector_len = 1; 19256 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19257 %} 19258 ins_pipe( pipe_slow ); 19259 %} 19260 19261 instruct vcvt8Lto8D_reg(vecZ dst, vecZ src) %{ 19262 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19263 match(Set dst (VectorCastL2X src)); 19264 format %{ "vcvtqq2pd $dst,$src\t! convert 8L to 8D vector" %} 19265 ins_encode %{ 19266 int vector_len = 2; 19267 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19268 %} 19269 ins_pipe( pipe_slow ); 19270 %} 19271 19272 instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ 19273 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19274 match(Set dst (VectorCastF2X src)); 19275 format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} 19276 ins_encode %{ 19277 int vector_len = 0; 19278 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19279 %} 19280 ins_pipe( pipe_slow ); 19281 %} 19282 19283 instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ 19284 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19285 match(Set dst (VectorCastF2X src)); 19286 format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} 19287 ins_encode %{ 19288 int vector_len = 1; 19289 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19290 %} 19291 ins_pipe( pipe_slow ); 19292 %} 19293 19294 instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ 19295 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19296 match(Set dst (VectorCastF2X src)); 19297 format %{ "evcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} 19298 ins_encode %{ 19299 int vector_len = 2; 19300 __ evcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19301 %} 19302 ins_pipe( pipe_slow ); 19303 %} 19304 19305 instruct vcvt2Dto2F_reg(vecD dst, vecX src) %{ 19306 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19307 match(Set dst (VectorCastD2X src)); 19308 format %{ "vcvtpd2ps $dst,$src\t! convert 2D to 2F vector" %} 19309 ins_encode %{ 19310 int vector_len = 0; 19311 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19312 %} 19313 ins_pipe( pipe_slow ); 19314 %} 19315 19316 instruct vcvt4Dto4F_reg(vecX dst, vecY src) %{ 19317 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19318 match(Set dst (VectorCastD2X src)); 19319 format %{ "vcvtpd2ps $dst,$src\t! convert 4D to 4F vector" %} 19320 ins_encode %{ 19321 int vector_len = 1; 19322 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19323 %} 19324 ins_pipe( pipe_slow ); 19325 %} 19326 19327 instruct vcvt8Dto8F_reg(vecY dst, vecZ src) %{ 19328 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19329 match(Set dst (VectorCastD2X src)); 19330 format %{ "evcvtpd2ps $dst,$src\t! convert 8D to 8F vector" %} 19331 ins_encode %{ 19332 int vector_len = 2; 19333 __ evcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19334 %} 19335 ins_pipe( pipe_slow ); 19336 %} 19337 19338 instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ 19339 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19340 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19341 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19342 match(Set dst (VectorMaskCmp src1 src2)); 19343 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} 19344 ins_encode %{ 19345 int vector_len = 0; 19346 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19347 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19348 %} 19349 ins_pipe( pipe_slow ); 19350 %} 19351 19352 instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ 19353 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19354 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19355 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19356 match(Set dst (VectorMaskCmp src1 src2)); 19357 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} 19358 ins_encode %{ 19359 int vector_len = 0; 19360 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19361 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19362 %} 19363 ins_pipe( pipe_slow ); 19364 %} 19365 19366 instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ 19367 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19368 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19369 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19370 match(Set dst (VectorMaskCmp src1 src2)); 19371 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} 19372 ins_encode %{ 19373 int vector_len = 1; 19374 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19375 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19376 %} 19377 ins_pipe( pipe_slow ); 19378 %} 19379 19380 instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19381 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19382 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19383 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19384 match(Set dst (VectorMaskCmp src1 src2)); 19385 effect(TEMP dst, TEMP scratch); 19386 format %{ "vcmpeqps k2,$src1,$src2\n\t" 19387 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} 19388 ins_encode %{ 19389 int vector_len = 2; 19390 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19391 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19392 KRegister mask = k0; // The comparison itself is not being masked. 19393 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19394 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19395 %} 19396 ins_pipe( pipe_slow ); 19397 %} 19398 19399 instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ 19400 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19401 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19402 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19403 match(Set dst (VectorMaskCmp src1 src2)); 19404 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} 19405 ins_encode %{ 19406 int vector_len = 0; 19407 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19408 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19409 %} 19410 ins_pipe( pipe_slow ); 19411 %} 19412 19413 instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ 19414 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19415 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19416 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19417 match(Set dst (VectorMaskCmp src1 src2)); 19418 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} 19419 ins_encode %{ 19420 int vector_len = 0; 19421 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19422 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19423 %} 19424 ins_pipe( pipe_slow ); 19425 %} 19426 19427 instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ 19428 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19429 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19430 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19431 match(Set dst (VectorMaskCmp src1 src2)); 19432 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} 19433 ins_encode %{ 19434 int vector_len = 1; 19435 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19436 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19437 %} 19438 ins_pipe( pipe_slow ); 19439 %} 19440 19441 instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19442 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19443 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19444 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19445 match(Set dst (VectorMaskCmp src1 src2)); 19446 effect(TEMP dst, TEMP scratch); 19447 format %{ "vcmpltps k2,$src1,$src2\n\t" 19448 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} 19449 ins_encode %{ 19450 int vector_len = 2; 19451 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19452 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19453 KRegister mask = k0; // The comparison itself is not being masked. 19454 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19455 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19456 %} 19457 ins_pipe( pipe_slow ); 19458 %} 19459 19460 instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ 19461 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19462 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19463 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19464 match(Set dst (VectorMaskCmp src1 src2)); 19465 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} 19466 ins_encode %{ 19467 int vector_len = 0; 19468 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19469 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19470 %} 19471 ins_pipe( pipe_slow ); 19472 %} 19473 19474 instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ 19475 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19476 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19477 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19478 match(Set dst (VectorMaskCmp src1 src2)); 19479 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} 19480 ins_encode %{ 19481 int vector_len = 0; 19482 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19483 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19484 %} 19485 ins_pipe( pipe_slow ); 19486 %} 19487 19488 instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ 19489 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19490 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19491 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19492 match(Set dst (VectorMaskCmp src1 src2)); 19493 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} 19494 ins_encode %{ 19495 int vector_len = 1; 19496 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19497 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19498 %} 19499 ins_pipe( pipe_slow ); 19500 %} 19501 19502 instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19503 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19504 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19505 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19506 match(Set dst (VectorMaskCmp src1 src2)); 19507 effect(TEMP dst, TEMP scratch); 19508 format %{ "vcmpgtps k2,$src1,$src2\n\t" 19509 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} 19510 ins_encode %{ 19511 int vector_len = 2; 19512 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19513 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19514 KRegister mask = k0; // The comparison itself is not being masked. 19515 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19516 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19517 %} 19518 ins_pipe( pipe_slow ); 19519 %} 19520 19521 instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ 19522 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19523 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19524 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19525 match(Set dst (VectorMaskCmp src1 src2)); 19526 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} 19527 ins_encode %{ 19528 int vector_len = 0; 19529 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19530 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19531 %} 19532 ins_pipe( pipe_slow ); 19533 %} 19534 19535 instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ 19536 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19537 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19538 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19539 match(Set dst (VectorMaskCmp src1 src2)); 19540 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} 19541 ins_encode %{ 19542 int vector_len = 0; 19543 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19544 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19545 %} 19546 ins_pipe( pipe_slow ); 19547 %} 19548 19549 instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ 19550 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19551 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19552 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19553 match(Set dst (VectorMaskCmp src1 src2)); 19554 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} 19555 ins_encode %{ 19556 int vector_len = 1; 19557 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19558 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19559 %} 19560 ins_pipe( pipe_slow ); 19561 %} 19562 19563 instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19564 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19565 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19566 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19567 match(Set dst (VectorMaskCmp src1 src2)); 19568 effect(TEMP dst, TEMP scratch); 19569 format %{ "vcmpgeps k2,$src1,$src2\n\t" 19570 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} 19571 ins_encode %{ 19572 int vector_len = 2; 19573 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19574 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19575 KRegister mask = k0; // The comparison itself is not being masked. 19576 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19577 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19578 %} 19579 ins_pipe( pipe_slow ); 19580 %} 19581 19582 instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ 19583 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19584 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19585 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19586 match(Set dst (VectorMaskCmp src1 src2)); 19587 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} 19588 ins_encode %{ 19589 int vector_len = 0; 19590 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19591 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19592 %} 19593 ins_pipe( pipe_slow ); 19594 %} 19595 19596 instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ 19597 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19598 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19599 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19600 match(Set dst (VectorMaskCmp src1 src2)); 19601 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} 19602 ins_encode %{ 19603 int vector_len = 0; 19604 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19605 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19606 %} 19607 ins_pipe( pipe_slow ); 19608 %} 19609 19610 instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ 19611 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19612 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19613 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19614 match(Set dst (VectorMaskCmp src1 src2)); 19615 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} 19616 ins_encode %{ 19617 int vector_len = 1; 19618 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19619 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19620 %} 19621 ins_pipe( pipe_slow ); 19622 %} 19623 19624 instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19625 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19626 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19627 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19628 match(Set dst (VectorMaskCmp src1 src2)); 19629 effect(TEMP dst, TEMP scratch); 19630 format %{ "vcmpleps k2,$src1,$src2\n\t" 19631 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} 19632 ins_encode %{ 19633 int vector_len = 2; 19634 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19635 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19636 KRegister mask = k0; // The comparison itself is not being masked. 19637 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19638 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19639 %} 19640 ins_pipe( pipe_slow ); 19641 %} 19642 19643 instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ 19644 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19645 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19646 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19647 match(Set dst (VectorMaskCmp src1 src2)); 19648 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} 19649 ins_encode %{ 19650 int vector_len = 0; 19651 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19652 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19653 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19654 %} 19655 ins_pipe( pipe_slow ); 19656 %} 19657 19658 instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ 19659 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19660 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19661 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19662 match(Set dst (VectorMaskCmp src1 src2)); 19663 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} 19664 ins_encode %{ 19665 int vector_len = 0; 19666 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19667 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19668 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19669 %} 19670 ins_pipe( pipe_slow ); 19671 %} 19672 19673 instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ 19674 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19675 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19676 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19677 match(Set dst (VectorMaskCmp src1 src2)); 19678 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} 19679 ins_encode %{ 19680 int vector_len = 1; 19681 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19682 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19683 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19684 %} 19685 ins_pipe( pipe_slow ); 19686 %} 19687 19688 instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19689 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19690 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19691 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19692 match(Set dst (VectorMaskCmp src1 src2)); 19693 effect(TEMP dst, TEMP scratch); 19694 format %{ "vcmpneps k2,$src1,$src2\n\t" 19695 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} 19696 ins_encode %{ 19697 int vector_len = 2; 19698 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19699 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19700 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19701 KRegister mask = k0; // The comparison itself is not being masked. 19702 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19703 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19704 %} 19705 ins_pipe( pipe_slow ); 19706 %} 19707 19708 instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ 19709 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19710 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19711 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19712 match(Set dst (VectorMaskCmp src1 src2)); 19713 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} 19714 ins_encode %{ 19715 int vector_len = 0; 19716 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19717 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19718 %} 19719 ins_pipe( pipe_slow ); 19720 %} 19721 19722 instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ 19723 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19724 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19725 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19726 match(Set dst (VectorMaskCmp src1 src2)); 19727 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} 19728 ins_encode %{ 19729 int vector_len = 0; 19730 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19731 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19732 %} 19733 ins_pipe( pipe_slow ); 19734 %} 19735 19736 instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ 19737 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19738 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19739 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19740 match(Set dst (VectorMaskCmp src1 src2)); 19741 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} 19742 ins_encode %{ 19743 int vector_len = 1; 19744 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19745 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19746 %} 19747 ins_pipe( pipe_slow ); 19748 %} 19749 19750 instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19751 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19752 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19753 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19754 match(Set dst (VectorMaskCmp src1 src2)); 19755 effect(TEMP dst, TEMP scratch); 19756 format %{ "vcmpeqpd k2,$src1,$src2\n\t" 19757 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} 19758 ins_encode %{ 19759 int vector_len = 2; 19760 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19761 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19762 KRegister mask = k0; // The comparison itself is not being masked. 19763 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19764 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19765 %} 19766 ins_pipe( pipe_slow ); 19767 %} 19768 19769 instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ 19770 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19771 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19772 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19773 match(Set dst (VectorMaskCmp src1 src2)); 19774 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} 19775 ins_encode %{ 19776 int vector_len = 0; 19777 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19778 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19779 %} 19780 ins_pipe( pipe_slow ); 19781 %} 19782 19783 instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ 19784 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19785 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19786 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19787 match(Set dst (VectorMaskCmp src1 src2)); 19788 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} 19789 ins_encode %{ 19790 int vector_len = 0; 19791 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19792 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19793 %} 19794 ins_pipe( pipe_slow ); 19795 %} 19796 19797 instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ 19798 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19799 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19800 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19801 match(Set dst (VectorMaskCmp src1 src2)); 19802 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} 19803 ins_encode %{ 19804 int vector_len = 1; 19805 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19806 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19807 %} 19808 ins_pipe( pipe_slow ); 19809 %} 19810 19811 instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19812 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19813 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19814 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19815 match(Set dst (VectorMaskCmp src1 src2)); 19816 effect(TEMP dst, TEMP scratch); 19817 format %{ "vcmpltpd k2,$src1,$src2\n\t" 19818 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} 19819 ins_encode %{ 19820 int vector_len = 2; 19821 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19822 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19823 KRegister mask = k0; // The comparison itself is not being masked. 19824 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19825 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19826 %} 19827 ins_pipe( pipe_slow ); 19828 %} 19829 19830 instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ 19831 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19832 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19833 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19834 match(Set dst (VectorMaskCmp src1 src2)); 19835 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} 19836 ins_encode %{ 19837 int vector_len = 0; 19838 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19839 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19840 %} 19841 ins_pipe( pipe_slow ); 19842 %} 19843 19844 instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ 19845 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19846 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19847 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19848 match(Set dst (VectorMaskCmp src1 src2)); 19849 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} 19850 ins_encode %{ 19851 int vector_len = 0; 19852 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19853 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19854 %} 19855 ins_pipe( pipe_slow ); 19856 %} 19857 19858 instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ 19859 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19860 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19861 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19862 match(Set dst (VectorMaskCmp src1 src2)); 19863 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} 19864 ins_encode %{ 19865 int vector_len = 1; 19866 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19867 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19868 %} 19869 ins_pipe( pipe_slow ); 19870 %} 19871 19872 instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19873 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19874 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19875 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19876 match(Set dst (VectorMaskCmp src1 src2)); 19877 effect(TEMP dst, TEMP scratch); 19878 format %{ "vcmpgtpd k2,$src1,$src2\n\t" 19879 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} 19880 ins_encode %{ 19881 int vector_len = 2; 19882 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19883 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19884 KRegister mask = k0; // The comparison itself is not being masked. 19885 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19886 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19887 %} 19888 ins_pipe( pipe_slow ); 19889 %} 19890 19891 instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ 19892 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19893 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19894 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19895 match(Set dst (VectorMaskCmp src1 src2)); 19896 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} 19897 ins_encode %{ 19898 int vector_len = 0; 19899 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19900 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19901 %} 19902 ins_pipe( pipe_slow ); 19903 %} 19904 19905 instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ 19906 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19907 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19908 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19909 match(Set dst (VectorMaskCmp src1 src2)); 19910 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} 19911 ins_encode %{ 19912 int vector_len = 0; 19913 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19914 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19915 %} 19916 ins_pipe( pipe_slow ); 19917 %} 19918 19919 instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ 19920 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19921 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19922 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19923 match(Set dst (VectorMaskCmp src1 src2)); 19924 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} 19925 ins_encode %{ 19926 int vector_len = 1; 19927 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19928 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19929 %} 19930 ins_pipe( pipe_slow ); 19931 %} 19932 19933 instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19934 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19935 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19936 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19937 match(Set dst (VectorMaskCmp src1 src2)); 19938 effect(TEMP dst, TEMP scratch); 19939 format %{ "vcmpgepd k2,$src1,$src2\n\t" 19940 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} 19941 ins_encode %{ 19942 int vector_len = 2; 19943 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19944 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19945 KRegister mask = k0; // The comparison itself is not being masked. 19946 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19947 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19948 %} 19949 ins_pipe( pipe_slow ); 19950 %} 19951 19952 instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ 19953 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19954 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19955 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19956 match(Set dst (VectorMaskCmp src1 src2)); 19957 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} 19958 ins_encode %{ 19959 int vector_len = 0; 19960 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19961 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19962 %} 19963 ins_pipe( pipe_slow ); 19964 %} 19965 19966 instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ 19967 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19968 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19969 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19970 match(Set dst (VectorMaskCmp src1 src2)); 19971 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} 19972 ins_encode %{ 19973 int vector_len = 0; 19974 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19975 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19976 %} 19977 ins_pipe( pipe_slow ); 19978 %} 19979 19980 instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ 19981 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19982 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19983 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19984 match(Set dst (VectorMaskCmp src1 src2)); 19985 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} 19986 ins_encode %{ 19987 int vector_len = 1; 19988 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19989 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19990 %} 19991 ins_pipe( pipe_slow ); 19992 %} 19993 19994 instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19995 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19996 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19997 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19998 match(Set dst (VectorMaskCmp src1 src2)); 19999 effect(TEMP dst, TEMP scratch); 20000 format %{ "vcmplepd k2,$src1,$src2\n\t" 20001 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} 20002 ins_encode %{ 20003 int vector_len = 2; 20004 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 20005 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20006 KRegister mask = k0; // The comparison itself is not being masked. 20007 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20008 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20009 %} 20010 ins_pipe( pipe_slow ); 20011 %} 20012 20013 instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ 20014 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 20015 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20016 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20017 match(Set dst (VectorMaskCmp src1 src2)); 20018 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} 20019 ins_encode %{ 20020 int vector_len = 0; 20021 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20022 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20023 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20024 %} 20025 ins_pipe( pipe_slow ); 20026 %} 20027 20028 instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ 20029 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20030 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20031 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20032 match(Set dst (VectorMaskCmp src1 src2)); 20033 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} 20034 ins_encode %{ 20035 int vector_len = 0; 20036 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20037 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20038 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20039 %} 20040 ins_pipe( pipe_slow ); 20041 %} 20042 20043 instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ 20044 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20045 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20046 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20047 match(Set dst (VectorMaskCmp src1 src2)); 20048 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} 20049 ins_encode %{ 20050 int vector_len = 1; 20051 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20052 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20053 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20054 %} 20055 ins_pipe( pipe_slow ); 20056 %} 20057 20058 instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20059 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 20060 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20061 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20062 match(Set dst (VectorMaskCmp src1 src2)); 20063 effect(TEMP dst, TEMP scratch); 20064 format %{ "vcmpnepd k2,$src1,$src2\n\t" 20065 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} 20066 ins_encode %{ 20067 int vector_len = 2; 20068 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20069 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20070 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20071 KRegister mask = k0; // The comparison itself is not being masked. 20072 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20073 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20074 %} 20075 ins_pipe( pipe_slow ); 20076 %} 20077 20078 instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ 20079 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20080 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20081 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20082 match(Set dst (VectorMaskCmp src1 src2)); 20083 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} 20084 ins_encode %{ 20085 int vector_len = 0; 20086 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20087 %} 20088 ins_pipe( pipe_slow ); 20089 %} 20090 20091 instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ 20092 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20093 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20094 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20095 match(Set dst (VectorMaskCmp src1 src2)); 20096 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} 20097 ins_encode %{ 20098 int vector_len = 0; 20099 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20100 %} 20101 ins_pipe( pipe_slow ); 20102 %} 20103 20104 instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ 20105 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20106 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20107 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20108 match(Set dst (VectorMaskCmp src1 src2)); 20109 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} 20110 ins_encode %{ 20111 int vector_len = 1; 20112 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20113 %} 20114 ins_pipe( pipe_slow ); 20115 %} 20116 20117 instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20118 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20119 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20120 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20121 match(Set dst (VectorMaskCmp src1 src2)); 20122 effect(TEMP dst, TEMP scratch); 20123 format %{ "vpcmpeqd k2,$src1,$src2\n\t" 20124 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} 20125 ins_encode %{ 20126 int vector_len = 2; 20127 Assembler::ComparisonPredicate cmp = Assembler::eq; 20128 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20129 KRegister mask = k0; // The comparison itself is not being masked. 20130 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20131 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20132 %} 20133 ins_pipe( pipe_slow ); 20134 %} 20135 20136 instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ 20137 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20138 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20139 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20140 match(Set dst (VectorMaskCmp src1 src2)); 20141 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} 20142 ins_encode %{ 20143 int vector_len = 0; 20144 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20145 %} 20146 ins_pipe( pipe_slow ); 20147 %} 20148 20149 instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ 20150 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20151 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20152 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20153 match(Set dst (VectorMaskCmp src1 src2)); 20154 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} 20155 ins_encode %{ 20156 int vector_len = 0; 20157 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20158 %} 20159 ins_pipe( pipe_slow ); 20160 %} 20161 20162 instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ 20163 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20164 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20165 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20166 match(Set dst (VectorMaskCmp src1 src2)); 20167 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} 20168 ins_encode %{ 20169 int vector_len = 1; 20170 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20171 %} 20172 ins_pipe( pipe_slow ); 20173 %} 20174 20175 instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20176 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20177 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20178 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20179 match(Set dst (VectorMaskCmp src1 src2)); 20180 effect(TEMP dst, TEMP scratch); 20181 format %{ "vpcmpnled k2,$src1,$src2\n\t" 20182 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 20183 ins_encode %{ 20184 int vector_len = 2; 20185 Assembler::ComparisonPredicate cmp = Assembler::lt; 20186 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20187 KRegister mask = k0; // The comparison itself is not being masked. 20188 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20189 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20190 %} 20191 ins_pipe( pipe_slow ); 20192 %} 20193 20194 instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ 20195 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20196 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20197 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20198 match(Set dst (VectorMaskCmp src1 src2)); 20199 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} 20200 ins_encode %{ 20201 int vector_len = 0; 20202 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20203 %} 20204 ins_pipe( pipe_slow ); 20205 %} 20206 20207 instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ 20208 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20209 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20210 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20211 match(Set dst (VectorMaskCmp src1 src2)); 20212 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} 20213 ins_encode %{ 20214 int vector_len = 0; 20215 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20216 %} 20217 ins_pipe( pipe_slow ); 20218 %} 20219 20220 instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ 20221 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20222 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20223 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20224 match(Set dst (VectorMaskCmp src1 src2)); 20225 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} 20226 ins_encode %{ 20227 int vector_len = 1; 20228 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20229 %} 20230 ins_pipe( pipe_slow ); 20231 %} 20232 20233 instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20234 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20235 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20236 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20237 match(Set dst (VectorMaskCmp src1 src2)); 20238 effect(TEMP dst, TEMP scratch); 20239 format %{ "vpcmpnled k2,$src1,$src2\n\t" 20240 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 20241 ins_encode %{ 20242 int vector_len = 2; 20243 Assembler::ComparisonPredicate cmp = Assembler::nle; 20244 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20245 KRegister mask = k0; // The comparison itself is not being masked. 20246 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20247 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20248 %} 20249 ins_pipe( pipe_slow ); 20250 %} 20251 20252 instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20253 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20254 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20255 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20256 match(Set dst (VectorMaskCmp src1 src2)); 20257 effect(TEMP scratch); 20258 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 20259 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} 20260 ins_encode %{ 20261 int vector_len = 0; 20262 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20263 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20264 %} 20265 ins_pipe( pipe_slow ); 20266 %} 20267 20268 instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20269 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20270 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20271 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20272 match(Set dst (VectorMaskCmp src1 src2)); 20273 effect(TEMP scratch); 20274 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 20275 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} 20276 ins_encode %{ 20277 int vector_len = 0; 20278 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20279 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20280 %} 20281 ins_pipe( pipe_slow ); 20282 %} 20283 20284 instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 20285 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20286 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20287 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20288 match(Set dst (VectorMaskCmp src1 src2)); 20289 effect(TEMP scratch); 20290 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 20291 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} 20292 ins_encode %{ 20293 int vector_len = 1; 20294 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20295 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20296 %} 20297 ins_pipe( pipe_slow ); 20298 %} 20299 20300 instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20301 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20302 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20303 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20304 match(Set dst (VectorMaskCmp src1 src2)); 20305 effect(TEMP dst, TEMP scratch); 20306 format %{ "vpcmpnltd k2,$src1,$src2\n\t" 20307 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} 20308 ins_encode %{ 20309 int vector_len = 2; 20310 Assembler::ComparisonPredicate cmp = Assembler::nlt; 20311 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20312 KRegister mask = k0; // The comparison itself is not being masked. 20313 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20314 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20315 %} 20316 ins_pipe( pipe_slow ); 20317 %} 20318 20319 instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20320 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20321 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20322 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20323 match(Set dst (VectorMaskCmp src1 src2)); 20324 effect(TEMP scratch); 20325 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 20326 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} 20327 ins_encode %{ 20328 int vector_len = 0; 20329 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20330 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20331 %} 20332 ins_pipe( pipe_slow ); 20333 %} 20334 20335 instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20336 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20337 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20338 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20339 match(Set dst (VectorMaskCmp src1 src2)); 20340 effect(TEMP scratch); 20341 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 20342 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} 20343 ins_encode %{ 20344 int vector_len = 0; 20345 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20346 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20347 %} 20348 ins_pipe( pipe_slow ); 20349 %} 20350 20351 instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 20352 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20353 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20354 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20355 match(Set dst (VectorMaskCmp src1 src2)); 20356 effect(TEMP scratch); 20357 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 20358 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} 20359 ins_encode %{ 20360 int vector_len = 1; 20361 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20362 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20363 %} 20364 ins_pipe( pipe_slow ); 20365 %} 20366 20367 instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20368 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20369 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20370 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20371 match(Set dst (VectorMaskCmp src1 src2)); 20372 effect(TEMP dst, TEMP scratch); 20373 format %{ "vpcmpled k2,$src1,$src2\n\t" 20374 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} 20375 ins_encode %{ 20376 int vector_len = 2; 20377 Assembler::ComparisonPredicate cmp = Assembler::le; 20378 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20379 KRegister mask = k0; // The comparison itself is not being masked. 20380 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20381 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20382 %} 20383 ins_pipe( pipe_slow ); 20384 %} 20385 20386 instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20387 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20388 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20389 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20390 match(Set dst (VectorMaskCmp src1 src2)); 20391 effect(TEMP scratch); 20392 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 20393 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} 20394 ins_encode %{ 20395 int vector_len = 0; 20396 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20397 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20398 %} 20399 ins_pipe( pipe_slow ); 20400 %} 20401 20402 instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20403 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20404 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20405 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20406 match(Set dst (VectorMaskCmp src1 src2)); 20407 effect(TEMP scratch); 20408 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 20409 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} 20410 ins_encode %{ 20411 int vector_len = 0; 20412 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20413 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20414 %} 20415 ins_pipe( pipe_slow ); 20416 %} 20417 20418 instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 20419 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20420 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20421 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20422 match(Set dst (VectorMaskCmp src1 src2)); 20423 effect(TEMP scratch); 20424 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 20425 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} 20426 ins_encode %{ 20427 int vector_len = 1; 20428 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20429 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20430 %} 20431 ins_pipe( pipe_slow ); 20432 %} 20433 20434 instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20435 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20436 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20437 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20438 match(Set dst (VectorMaskCmp src1 src2)); 20439 effect(TEMP dst, TEMP scratch); 20440 format %{ "vpcmpneqd k2,$src1,$src2\n\t" 20441 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} 20442 ins_encode %{ 20443 int vector_len = 2; 20444 Assembler::ComparisonPredicate cmp = Assembler::neq; 20445 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20446 KRegister mask = k0; // The comparison itself is not being masked. 20447 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20448 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20449 %} 20450 ins_pipe( pipe_slow ); 20451 %} 20452 20453 instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ 20454 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20455 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20456 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20457 match(Set dst (VectorMaskCmp src1 src2)); 20458 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} 20459 ins_encode %{ 20460 int vector_len = 0; 20461 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20462 %} 20463 ins_pipe( pipe_slow ); 20464 %} 20465 20466 instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ 20467 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20468 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20469 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20470 match(Set dst (VectorMaskCmp src1 src2)); 20471 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} 20472 ins_encode %{ 20473 int vector_len = 0; 20474 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20475 %} 20476 ins_pipe( pipe_slow ); 20477 %} 20478 20479 instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ 20480 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 20481 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20482 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20483 match(Set dst (VectorMaskCmp src1 src2)); 20484 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} 20485 ins_encode %{ 20486 int vector_len = 1; 20487 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20488 %} 20489 ins_pipe( pipe_slow ); 20490 %} 20491 20492 instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20493 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 20494 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20495 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20496 match(Set dst (VectorMaskCmp src1 src2)); 20497 effect(TEMP dst, TEMP scratch); 20498 format %{ "vpcmpeqb k2,$src1,$src2\n\t" 20499 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} 20500 ins_encode %{ 20501 int vector_len = 2; 20502 Assembler::ComparisonPredicate cmp = Assembler::eq; 20503 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20504 KRegister mask = k0; // The comparison itself is not being masked. 20505 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20506 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20507 %} 20508 ins_pipe( pipe_slow ); 20509 %} 20510 20511 instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ 20512 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20513 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20514 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20515 match(Set dst (VectorMaskCmp src1 src2)); 20516 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} 20517 ins_encode %{ 20518 int vector_len = 0; 20519 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20520 %} 20521 ins_pipe( pipe_slow ); 20522 %} 20523 20524 instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ 20525 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20526 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20527 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20528 match(Set dst (VectorMaskCmp src1 src2)); 20529 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} 20530 ins_encode %{ 20531 int vector_len = 0; 20532 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20533 %} 20534 ins_pipe( pipe_slow ); 20535 %} 20536 20537 instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ 20538 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 20539 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20540 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20541 match(Set dst (VectorMaskCmp src1 src2)); 20542 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} 20543 ins_encode %{ 20544 int vector_len = 1; 20545 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20546 %} 20547 ins_pipe( pipe_slow ); 20548 %} 20549 20550 instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20551 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 20552 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20553 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20554 match(Set dst (VectorMaskCmp src1 src2)); 20555 effect(TEMP dst, TEMP scratch); 20556 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 20557 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 20558 ins_encode %{ 20559 int vector_len = 2; 20560 Assembler::ComparisonPredicate cmp = Assembler::lt; 20561 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20562 KRegister mask = k0; // The comparison itself is not being masked. 20563 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20564 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20565 %} 20566 ins_pipe( pipe_slow ); 20567 %} 20568 20569 instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ 20570 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20571 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20572 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20573 match(Set dst (VectorMaskCmp src1 src2)); 20574 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} 20575 ins_encode %{ 20576 int vector_len = 0; 20577 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20578 %} 20579 ins_pipe( pipe_slow ); 20580 %} 20581 20582 instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ 20583 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20584 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20585 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20586 match(Set dst (VectorMaskCmp src1 src2)); 20587 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} 20588 ins_encode %{ 20589 int vector_len = 0; 20590 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20591 %} 20592 ins_pipe( pipe_slow ); 20593 %} 20594 20595 instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ 20596 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 20597 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20598 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20599 match(Set dst (VectorMaskCmp src1 src2)); 20600 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} 20601 ins_encode %{ 20602 int vector_len = 1; 20603 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20604 %} 20605 ins_pipe( pipe_slow ); 20606 %} 20607 20608 instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20609 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 20610 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20611 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20612 match(Set dst (VectorMaskCmp src1 src2)); 20613 effect(TEMP dst, TEMP scratch); 20614 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 20615 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 20616 ins_encode %{ 20617 int vector_len = 2; 20618 Assembler::ComparisonPredicate cmp = Assembler::nle; 20619 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20620 KRegister mask = k0; // The comparison itself is not being masked. 20621 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20622 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20623 %} 20624 ins_pipe( pipe_slow ); 20625 %} 20626 20627 instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20628 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20629 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20630 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20631 match(Set dst (VectorMaskCmp src1 src2)); 20632 effect(TEMP scratch); 20633 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 20634 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} 20635 ins_encode %{ 20636 int vector_len = 0; 20637 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20638 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20639 %} 20640 ins_pipe( pipe_slow ); 20641 %} 20642 20643 instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20644 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20645 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20646 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20647 match(Set dst (VectorMaskCmp src1 src2)); 20648 effect(TEMP scratch); 20649 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 20650 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} 20651 ins_encode %{ 20652 int vector_len = 0; 20653 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20654 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20655 %} 20656 ins_pipe( pipe_slow ); 20657 %} 20658 20659 instruct extract8d(regD dst, vecZ src, vecZ tmp, immI idx) %{ 20660 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20661 match(Set dst (ExtractD src idx)); 20662 effect(TEMP tmp); 20663 ins_encode %{ 20664 int vector_len = 2; 20665 int midx = 0x7 & $idx$$constant; 20666 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20667 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20668 } else if (midx == 1) { 20669 __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20670 } else if (midx > 1 && midx <= 7) { 20671 int extr_idx1 = midx / 2; 20672 int extr_idx2 = midx % 2; 20673 __ vextractf32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20674 __ vpshufpd($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, extr_idx2, vector_len); 20675 } 20676 %} 20677 ins_pipe( pipe_slow ); 20678 %} 20679 20680 instruct extract4d(regD dst, vecY src, vecY tmp, immI idx) %{ 20681 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20682 match(Set dst (ExtractD src idx)); 20683 effect(TEMP tmp); 20684 ins_encode %{ 20685 int vector_len = 1; 20686 int midx = 0x3 & $idx$$constant; 20687 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20688 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20689 } else if (midx == 1) { 20690 __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20691 } else if (midx > 1 && midx <= 3) { 20692 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20693 __ vpshufpd($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, midx - 2, vector_len); 20694 } 20695 20696 %} 20697 ins_pipe( pipe_slow ); 20698 %} 20699 20700 instruct extract2d(regD dst, vecX src, immI idx) %{ 20701 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20702 match(Set dst (ExtractD src idx)); 20703 ins_encode %{ 20704 int vector_len = 0; 20705 int midx = 0x1 & $idx$$constant; 20706 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20707 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20708 } else if (midx >=1) { 20709 __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20710 } 20711 %} 20712 ins_pipe( pipe_slow ); 20713 %} 20714 20715 instruct extract1d(regD dst, vecD src, immI idx) %{ 20716 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 1); 20717 match(Set dst (ExtractD src idx)); 20718 ins_encode %{ 20719 int vector_len = 0; 20720 int midx = 0x1 & $idx$$constant; 20721 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20722 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20723 } 20724 %} 20725 ins_pipe( pipe_slow ); 20726 %} 20727 20728 instruct extract16f(regF dst, vecZ src, vecZ tmp, immI idx) %{ 20729 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 16); 20730 match(Set dst (ExtractF src idx)); 20731 effect(TEMP tmp); 20732 ins_encode %{ 20733 int vector_len=2; 20734 int midx = 0xF & $idx$$constant; 20735 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20736 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20737 } else if (midx >= 1 && midx <= 3) { 20738 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20739 } else { 20740 int extr_idx1 = midx / 4; 20741 int extr_idx2 = midx % 4; 20742 __ vextractf32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20743 __ vpshufps($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, extr_idx2, vector_len); 20744 } 20745 %} 20746 ins_pipe( pipe_slow ); 20747 %} 20748 20749 instruct extract8f(regF dst, vecY src, vecY tmp, immI idx) %{ 20750 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20751 match(Set dst (ExtractF src idx)); 20752 effect(TEMP tmp); 20753 ins_encode %{ 20754 int vector_len=1; 20755 int midx = 0x7 & $idx$$constant; 20756 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20757 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20758 } else if (midx >= 1 && midx <= 3) { 20759 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20760 } else if (midx >= 4) { 20761 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20762 __ vpshufps($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, midx - 4, vector_len); 20763 } 20764 %} 20765 ins_pipe( pipe_slow ); 20766 %} 20767 20768 instruct extract4f(regF dst, vecX src, immI idx) %{ 20769 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20770 match(Set dst (ExtractF src idx)); 20771 ins_encode %{ 20772 int vector_len=0; 20773 int midx = 0x3 & $idx$$constant; 20774 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20775 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20776 } else if (midx >= 1 && midx <= 3) { 20777 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20778 } 20779 %} 20780 ins_pipe( pipe_slow ); 20781 %} 20782 20783 instruct extract2f(regF dst, vecD src, immI idx) %{ 20784 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20785 match(Set dst (ExtractF src idx)); 20786 ins_encode %{ 20787 int vector_len=0; 20788 int midx = 0x1 & $idx$$constant; 20789 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20790 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20791 } else { 20792 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20793 } 20794 %} 20795 ins_pipe( pipe_slow ); 20796 %} 20797 20798 instruct extract8l(rRegL dst, vecZ src, vecZ tmp, immI idx) %{ 20799 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20800 match(Set dst (ExtractL src idx)); 20801 effect(TEMP tmp); 20802 ins_encode %{ 20803 int midx = 0x7 & $idx$$constant; 20804 if (midx == 0) { 20805 __ movdl($dst$$Register, $src$$XMMRegister); 20806 } else if (midx >= 1 && midx <= 3) { 20807 __ pextrq($dst$$Register, $src$$XMMRegister, midx); 20808 } 20809 else if (midx >= 4 && midx <= 7) { 20810 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20811 __ pextrq($dst$$Register, $tmp$$XMMRegister, midx-4); 20812 } 20813 %} 20814 ins_pipe( pipe_slow ); 20815 %} 20816 20817 instruct extract4l(rRegL dst, vecY src, immI idx) %{ 20818 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20819 match(Set dst (ExtractL src idx)); 20820 ins_encode %{ 20821 int midx = 0x3 & $idx$$constant; 20822 if (midx == 0) { 20823 __ movdl($dst$$Register, $src$$XMMRegister); 20824 } else if (midx >= 1 && midx <= 3) { 20825 __ pextrq($dst$$Register, $src$$XMMRegister, midx); 20826 } 20827 %} 20828 ins_pipe( pipe_slow ); 20829 %} 20830 20831 instruct extract2l(rRegL dst, vecX src, immI idx) %{ 20832 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20833 match(Set dst (ExtractL src idx)); 20834 ins_encode %{ 20835 int midx = 0x1 & $idx$$constant; 20836 if (midx == 0) { 20837 __ movdl($dst$$Register, $src$$XMMRegister); 20838 } else if (midx >= 1) { 20839 __ pextrq($dst$$Register, $src$$XMMRegister, midx); 20840 } 20841 %} 20842 ins_pipe( pipe_slow ); 20843 %} 20844 20845 instruct extract1l(rRegL dst, vecD src, immI idx) %{ 20846 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 1); 20847 match(Set dst (ExtractL src idx)); 20848 ins_encode %{ 20849 int midx = 0x1 & $idx$$constant; 20850 if (midx == 0) { 20851 __ movdl($dst$$Register, $src$$XMMRegister); 20852 } 20853 %} 20854 ins_pipe( pipe_slow ); 20855 %} 20856 20857 instruct extract16i(rRegI dst, vecZ src, vecZ tmp, immI idx) %{ 20858 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 16); 20859 match(Set dst (ExtractI src idx)); 20860 effect(TEMP tmp); 20861 ins_encode %{ 20862 int midx = 0xF & $idx$$constant; 20863 if (midx == 0) { 20864 __ movdl($dst$$Register, $src$$XMMRegister); 20865 } 20866 else if (midx >= 1 && midx <= 3) { 20867 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20868 } 20869 else { 20870 // Using 4 because there are 4 ints in 128-bit 20871 int extr_idx1 = midx / 4; 20872 int extr_idx2 = midx % 4; 20873 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20874 __ pextrd($dst$$Register, $tmp$$XMMRegister, extr_idx2); 20875 } 20876 %} 20877 ins_pipe( pipe_slow ); 20878 %} 20879 20880 instruct extract8i(rRegI dst, vecY src, vecY tmp, immI idx) %{ 20881 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20882 match(Set dst (ExtractI src idx)); 20883 effect(TEMP tmp); 20884 ins_encode %{ 20885 int midx = 0x7 & $idx$$constant; 20886 if (midx == 0) { 20887 __ movdl($dst$$Register, $src$$XMMRegister); 20888 } else if (midx >= 1 && midx <= 3) { 20889 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20890 } else if (midx >= 4) { 20891 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20892 __ pextrd($dst$$Register, $tmp$$XMMRegister, midx - 4); 20893 } 20894 %} 20895 ins_pipe( pipe_slow ); 20896 %} 20897 20898 instruct extract4i(rRegI dst, vecX src, immI idx) %{ 20899 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20900 match(Set dst (ExtractI src idx)); 20901 ins_encode %{ 20902 int midx = 0x3 & $idx$$constant; 20903 if (midx == 0) { 20904 __ movdl($dst$$Register, $src$$XMMRegister); 20905 } else if (midx >= 1 && midx <= 3) { 20906 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20907 } 20908 %} 20909 ins_pipe( pipe_slow ); 20910 %} 20911 20912 instruct extract2i(rRegI dst, vecD src, immI idx) %{ 20913 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20914 match(Set dst (ExtractI src idx)); 20915 ins_encode %{ 20916 int midx = 0x1 & $idx$$constant; 20917 if (midx == 0) { 20918 __ movdl($dst$$Register, $src$$XMMRegister); 20919 } else if (midx >= 1) { 20920 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20921 } 20922 %} 20923 ins_pipe( pipe_slow ); 20924 %} 20925 20926 instruct extract32s(rRegI dst, vecZ src, vecZ tmp, immI idx) %{ 20927 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 32); 20928 match(Set dst (ExtractS src idx)); 20929 effect(TEMP tmp); 20930 ins_encode %{ 20931 int midx = 0x1F & $idx$$constant; 20932 if (midx == 0) { 20933 __ movdl($dst$$Register, $src$$XMMRegister); 20934 __ movswl($dst$$Register, $dst$$Register); 20935 } 20936 else if (midx >= 1 && midx <= 7) { 20937 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20938 __ movswl($dst$$Register, $dst$$Register); 20939 } 20940 else { 20941 int extr_idx1 = midx / 8; 20942 int extr_idx2 = midx % 8; 20943 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20944 __ pextrw($dst$$Register, $tmp$$XMMRegister, extr_idx2); 20945 __ movswl($dst$$Register, $dst$$Register); 20946 } 20947 %} 20948 ins_pipe( pipe_slow ); 20949 %} 20950 20951 instruct extract16s(rRegI dst, vecY src, vecY tmp, immI idx) %{ 20952 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 16); 20953 match(Set dst (ExtractS src idx)); 20954 effect(TEMP tmp); 20955 ins_encode %{ 20956 int midx = 0xF & $idx$$constant; 20957 if (midx == 0) { 20958 __ movdl($dst$$Register, $src$$XMMRegister); 20959 __ movswl($dst$$Register, $dst$$Register); 20960 } else if (midx >= 1 && midx <= 7) { 20961 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20962 __ movswl($dst$$Register, $dst$$Register); 20963 } 20964 else if (midx >= 8 && midx <= 15) { 20965 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20966 __ pextrw($dst$$Register, $tmp$$XMMRegister, midx-8); 20967 __ movswl($dst$$Register, $dst$$Register); 20968 } 20969 %} 20970 ins_pipe( pipe_slow ); 20971 %} 20972 20973 instruct extract8s(rRegI dst, vecX src, immI idx) %{ 20974 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20975 match(Set dst (ExtractS src idx)); 20976 ins_encode %{ 20977 int midx = 0x7 & $idx$$constant; 20978 if (midx == 0) { 20979 __ movdl($dst$$Register, $src$$XMMRegister); 20980 __ movswl($dst$$Register, $dst$$Register); 20981 } else if (midx >= 1) { 20982 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20983 __ movswl($dst$$Register, $dst$$Register); 20984 } 20985 %} 20986 ins_pipe( pipe_slow ); 20987 %} 20988 20989 instruct extract4s(rRegI dst, vecD src, immI idx) %{ 20990 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20991 match(Set dst (ExtractS src idx)); 20992 ins_encode %{ 20993 int midx = 0x3 & $idx$$constant; 20994 if (midx == 0) { 20995 __ movdl($dst$$Register, $src$$XMMRegister); 20996 __ movswl($dst$$Register, $dst$$Register); 20997 } else if (midx >= 1) { 20998 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20999 __ movswl($dst$$Register, $dst$$Register); 21000 } 21001 %} 21002 ins_pipe( pipe_slow ); 21003 %} 21004 21005 instruct extract64b(rRegI dst, vecZ src, vecZ tmp, immI idx) %{ 21006 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 64); 21007 match(Set dst (ExtractB src idx)); 21008 effect(TEMP tmp); 21009 ins_encode %{ 21010 int midx = 0x3F & $idx$$constant; 21011 if (midx == 0) { 21012 __ movdl($dst$$Register, $src$$XMMRegister); 21013 __ movsbl($dst$$Register, $dst$$Register); 21014 } 21015 else if (midx >= 1 && midx <= 15) { 21016 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21017 __ movsbl($dst$$Register, $dst$$Register); 21018 } 21019 else { 21020 int extr_idx1 = midx / 16; 21021 int extr_idx2 = midx % 16; 21022 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 21023 __ pextrb($dst$$Register, $tmp$$XMMRegister, extr_idx2); 21024 __ movsbl($dst$$Register, $dst$$Register); 21025 } 21026 %} 21027 ins_pipe( pipe_slow ); 21028 %} 21029 21030 instruct extract32b(rRegI dst, vecY src, vecY tmp, immI idx) %{ 21031 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 32); 21032 match(Set dst (ExtractB src idx)); 21033 effect(TEMP tmp); 21034 ins_encode %{ 21035 int midx = 0x1F & $idx$$constant; 21036 if (midx == 0) { 21037 __ movdl($dst$$Register, $src$$XMMRegister); 21038 __ movsbl($dst$$Register, $dst$$Register); 21039 } 21040 else if (midx >= 1 && midx <= 15) { 21041 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21042 __ movsbl($dst$$Register, $dst$$Register); 21043 } 21044 else { 21045 int extr_idx1 = midx / 16; 21046 int extr_idx2 = midx % 16; 21047 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 21048 __ pextrb($dst$$Register, $tmp$$XMMRegister, extr_idx2); 21049 __ movsbl($dst$$Register, $dst$$Register); 21050 } 21051 %} 21052 ins_pipe( pipe_slow ); 21053 %} 21054 21055 instruct extract16b(rRegI dst, vecX src, immI idx) %{ 21056 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 16); 21057 match(Set dst (ExtractB src idx)); 21058 ins_encode %{ 21059 int midx = 0xF & $idx$$constant; 21060 if (midx == 0) { 21061 __ movdl($dst$$Register, $src$$XMMRegister); 21062 __ movsbl($dst$$Register, $dst$$Register); 21063 } else if (midx >= 1) { 21064 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21065 __ movsbl($dst$$Register, $dst$$Register); 21066 } 21067 %} 21068 ins_pipe( pipe_slow ); 21069 %} 21070 21071 instruct extract8b(rRegI dst, vecD src, immI idx) %{ 21072 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 21073 match(Set dst (ExtractB src idx)); 21074 ins_encode %{ 21075 int midx = 0x7 & $idx$$constant; 21076 if (midx == 0) { 21077 __ movdl($dst$$Register, $src$$XMMRegister); 21078 __ movsbl($dst$$Register, $dst$$Register); 21079 } else if (midx >= 1) { 21080 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21081 __ movsbl($dst$$Register, $dst$$Register); 21082 } 21083 %} 21084 ins_pipe( pipe_slow ); 21085 %} 21086 21087 instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21088 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 21089 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21090 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21091 match(Set dst (VectorMaskCmp src1 src2)); 21092 effect(TEMP scratch); 21093 format %{ "vpcmpgtb $dst,$src2,$src1\n " 21094 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} 21095 ins_encode %{ 21096 int vector_len = 1; 21097 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21098 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21099 %} 21100 ins_pipe( pipe_slow ); 21101 %} 21102 21103 instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21104 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 21105 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21106 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21107 match(Set dst (VectorMaskCmp src1 src2)); 21108 effect(TEMP dst, TEMP scratch); 21109 format %{ "vpcmpnltb k2,$src1,$src2\n\t" 21110 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} 21111 ins_encode %{ 21112 int vector_len = 2; 21113 Assembler::ComparisonPredicate cmp = Assembler::nlt; 21114 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21115 KRegister mask = k0; // The comparison itself is not being masked. 21116 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21117 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21118 %} 21119 ins_pipe( pipe_slow ); 21120 %} 21121 21122 instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21123 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21124 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21125 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21126 match(Set dst (VectorMaskCmp src1 src2)); 21127 effect(TEMP scratch); 21128 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 21129 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} 21130 ins_encode %{ 21131 int vector_len = 0; 21132 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21133 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21134 %} 21135 ins_pipe( pipe_slow ); 21136 %} 21137 21138 instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21139 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 21140 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21141 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21142 match(Set dst (VectorMaskCmp src1 src2)); 21143 effect(TEMP scratch); 21144 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 21145 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} 21146 ins_encode %{ 21147 int vector_len = 0; 21148 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21149 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21150 %} 21151 ins_pipe( pipe_slow ); 21152 %} 21153 21154 instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21155 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 21156 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21157 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21158 match(Set dst (VectorMaskCmp src1 src2)); 21159 effect(TEMP scratch); 21160 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 21161 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} 21162 ins_encode %{ 21163 int vector_len = 1; 21164 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21165 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21166 %} 21167 ins_pipe( pipe_slow ); 21168 %} 21169 21170 instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21171 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 21172 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21173 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21174 match(Set dst (VectorMaskCmp src1 src2)); 21175 effect(TEMP dst, TEMP scratch); 21176 format %{ "vpcmpleb k2,$src1,$src2\n\t" 21177 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} 21178 ins_encode %{ 21179 int vector_len = 2; 21180 Assembler::ComparisonPredicate cmp = Assembler::le; 21181 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21182 KRegister mask = k0; // The comparison itself is not being masked. 21183 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21184 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21185 %} 21186 ins_pipe( pipe_slow ); 21187 %} 21188 21189 instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21190 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21191 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21192 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21193 match(Set dst (VectorMaskCmp src1 src2)); 21194 effect(TEMP scratch); 21195 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 21196 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} 21197 ins_encode %{ 21198 int vector_len = 0; 21199 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21200 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21201 %} 21202 ins_pipe( pipe_slow ); 21203 %} 21204 21205 instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21206 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 21207 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21208 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21209 match(Set dst (VectorMaskCmp src1 src2)); 21210 effect(TEMP scratch); 21211 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 21212 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} 21213 ins_encode %{ 21214 int vector_len = 0; 21215 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21216 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21217 %} 21218 ins_pipe( pipe_slow ); 21219 %} 21220 21221 instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21222 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 21223 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21224 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21225 match(Set dst (VectorMaskCmp src1 src2)); 21226 effect(TEMP scratch); 21227 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 21228 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} 21229 ins_encode %{ 21230 int vector_len = 1; 21231 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21232 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21233 %} 21234 ins_pipe( pipe_slow ); 21235 %} 21236 21237 instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21238 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 21239 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21240 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21241 match(Set dst (VectorMaskCmp src1 src2)); 21242 effect(TEMP dst, TEMP scratch); 21243 format %{ "vpcmpneqb k2,$src1,$src2\n\t" 21244 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} 21245 ins_encode %{ 21246 int vector_len = 2; 21247 Assembler::ComparisonPredicate cmp = Assembler::neq; 21248 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21249 KRegister mask = k0; // The comparison itself is not being masked. 21250 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21251 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21252 %} 21253 ins_pipe( pipe_slow ); 21254 %} 21255 21256 instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ 21257 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21258 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21259 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21260 match(Set dst (VectorMaskCmp src1 src2)); 21261 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} 21262 ins_encode %{ 21263 int vector_len = 0; 21264 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21265 %} 21266 ins_pipe( pipe_slow ); 21267 %} 21268 21269 instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ 21270 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21271 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21272 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21273 match(Set dst (VectorMaskCmp src1 src2)); 21274 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} 21275 ins_encode %{ 21276 int vector_len = 0; 21277 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21278 %} 21279 ins_pipe( pipe_slow ); 21280 %} 21281 21282 instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ 21283 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21284 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21285 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21286 match(Set dst (VectorMaskCmp src1 src2)); 21287 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} 21288 ins_encode %{ 21289 int vector_len = 1; 21290 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21291 %} 21292 ins_pipe( pipe_slow ); 21293 %} 21294 21295 instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21296 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21297 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21298 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21299 match(Set dst (VectorMaskCmp src1 src2)); 21300 effect(TEMP dst, TEMP scratch); 21301 format %{ "vpcmpeqw k2,$src1,$src2\n\t" 21302 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} 21303 ins_encode %{ 21304 int vector_len = 2; 21305 Assembler::ComparisonPredicate cmp = Assembler::eq; 21306 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21307 KRegister mask = k0; // The comparison itself is not being masked. 21308 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21309 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21310 %} 21311 ins_pipe( pipe_slow ); 21312 %} 21313 21314 instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ 21315 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21316 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21317 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21318 match(Set dst (VectorMaskCmp src1 src2)); 21319 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} 21320 ins_encode %{ 21321 int vector_len = 0; 21322 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21323 %} 21324 ins_pipe( pipe_slow ); 21325 %} 21326 21327 instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ 21328 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21329 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21330 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21331 match(Set dst (VectorMaskCmp src1 src2)); 21332 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} 21333 ins_encode %{ 21334 int vector_len = 0; 21335 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21336 %} 21337 ins_pipe( pipe_slow ); 21338 %} 21339 21340 instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ 21341 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21342 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21343 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21344 match(Set dst (VectorMaskCmp src1 src2)); 21345 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} 21346 ins_encode %{ 21347 int vector_len = 1; 21348 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21349 %} 21350 ins_pipe( pipe_slow ); 21351 %} 21352 21353 instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21354 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21355 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21356 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21357 match(Set dst (VectorMaskCmp src1 src2)); 21358 effect(TEMP dst, TEMP scratch); 21359 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 21360 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 21361 ins_encode %{ 21362 int vector_len = 2; 21363 Assembler::ComparisonPredicate cmp = Assembler::lt; 21364 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21365 KRegister mask = k0; // The comparison itself is not being masked. 21366 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21367 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21368 %} 21369 ins_pipe( pipe_slow ); 21370 %} 21371 21372 instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ 21373 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21374 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21375 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21376 match(Set dst (VectorMaskCmp src1 src2)); 21377 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} 21378 ins_encode %{ 21379 int vector_len = 0; 21380 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21381 %} 21382 ins_pipe( pipe_slow ); 21383 %} 21384 21385 instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ 21386 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21387 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21388 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21389 match(Set dst (VectorMaskCmp src1 src2)); 21390 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} 21391 ins_encode %{ 21392 int vector_len = 0; 21393 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21394 %} 21395 ins_pipe( pipe_slow ); 21396 %} 21397 21398 instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ 21399 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21400 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21401 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21402 match(Set dst (VectorMaskCmp src1 src2)); 21403 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} 21404 ins_encode %{ 21405 int vector_len = 1; 21406 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21407 %} 21408 ins_pipe( pipe_slow ); 21409 %} 21410 21411 instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21412 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21413 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21414 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21415 match(Set dst (VectorMaskCmp src1 src2)); 21416 effect(TEMP dst, TEMP scratch); 21417 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 21418 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 21419 ins_encode %{ 21420 int vector_len = 2; 21421 Assembler::ComparisonPredicate cmp = Assembler::nle; 21422 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21423 KRegister mask = k0; // The comparison itself is not being masked. 21424 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21425 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21426 %} 21427 ins_pipe( pipe_slow ); 21428 %} 21429 21430 instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21432 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21433 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21434 match(Set dst (VectorMaskCmp src1 src2)); 21435 effect(TEMP scratch); 21436 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 21437 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} 21438 ins_encode %{ 21439 int vector_len = 0; 21440 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21441 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21442 %} 21443 ins_pipe( pipe_slow ); 21444 %} 21445 21446 instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21447 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21448 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21449 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21450 match(Set dst (VectorMaskCmp src1 src2)); 21451 effect(TEMP scratch); 21452 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 21453 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} 21454 ins_encode %{ 21455 int vector_len = 0; 21456 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21457 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21458 %} 21459 ins_pipe( pipe_slow ); 21460 %} 21461 21462 instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21463 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21464 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21465 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21466 match(Set dst (VectorMaskCmp src1 src2)); 21467 effect(TEMP scratch); 21468 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 21469 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} 21470 ins_encode %{ 21471 int vector_len = 1; 21472 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21473 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21474 %} 21475 ins_pipe( pipe_slow ); 21476 %} 21477 21478 instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21479 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21480 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21481 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21482 match(Set dst (VectorMaskCmp src1 src2)); 21483 effect(TEMP dst, TEMP scratch); 21484 format %{ "vpcmpnltw k2,$src1,$src2\n\t" 21485 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} 21486 ins_encode %{ 21487 int vector_len = 2; 21488 Assembler::ComparisonPredicate cmp = Assembler::nlt; 21489 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21490 KRegister mask = k0; // The comparison itself is not being masked. 21491 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21492 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21493 %} 21494 ins_pipe( pipe_slow ); 21495 %} 21496 21497 instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21498 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21499 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21500 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21501 match(Set dst (VectorMaskCmp src1 src2)); 21502 effect(TEMP scratch); 21503 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 21504 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} 21505 ins_encode %{ 21506 int vector_len = 0; 21507 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21508 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21509 %} 21510 ins_pipe( pipe_slow ); 21511 %} 21512 21513 instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21514 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21515 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21516 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21517 match(Set dst (VectorMaskCmp src1 src2)); 21518 effect(TEMP scratch); 21519 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 21520 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} 21521 ins_encode %{ 21522 int vector_len = 0; 21523 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21524 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21525 %} 21526 ins_pipe( pipe_slow ); 21527 %} 21528 21529 instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21530 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21531 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21532 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21533 match(Set dst (VectorMaskCmp src1 src2)); 21534 effect(TEMP scratch); 21535 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 21536 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} 21537 ins_encode %{ 21538 int vector_len = 1; 21539 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21540 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21541 %} 21542 ins_pipe( pipe_slow ); 21543 %} 21544 21545 instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21546 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21547 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21548 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21549 match(Set dst (VectorMaskCmp src1 src2)); 21550 effect(TEMP dst, TEMP scratch); 21551 format %{ "vpcmplew k2,$src1,$src2\n\t" 21552 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} 21553 ins_encode %{ 21554 int vector_len = 2; 21555 Assembler::ComparisonPredicate cmp = Assembler::le; 21556 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21557 KRegister mask = k0; // The comparison itself is not being masked. 21558 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21559 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21560 %} 21561 ins_pipe( pipe_slow ); 21562 %} 21563 21564 instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21565 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21566 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21567 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21568 match(Set dst (VectorMaskCmp src1 src2)); 21569 effect(TEMP scratch); 21570 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 21571 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} 21572 ins_encode %{ 21573 int vector_len = 0; 21574 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21575 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21576 %} 21577 ins_pipe( pipe_slow ); 21578 %} 21579 21580 instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21581 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21582 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21583 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21584 match(Set dst (VectorMaskCmp src1 src2)); 21585 effect(TEMP scratch); 21586 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 21587 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} 21588 ins_encode %{ 21589 int vector_len = 0; 21590 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21591 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21592 %} 21593 ins_pipe( pipe_slow ); 21594 %} 21595 21596 instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21597 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21598 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21599 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21600 match(Set dst (VectorMaskCmp src1 src2)); 21601 effect(TEMP scratch); 21602 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 21603 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} 21604 ins_encode %{ 21605 int vector_len = 1; 21606 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21607 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21608 %} 21609 ins_pipe( pipe_slow ); 21610 %} 21611 21612 instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21613 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21614 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21615 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21616 match(Set dst (VectorMaskCmp src1 src2)); 21617 effect(TEMP dst, TEMP scratch); 21618 format %{ "vpcmpneqw k2,$src1,$src2\n\t" 21619 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} 21620 ins_encode %{ 21621 int vector_len = 2; 21622 Assembler::ComparisonPredicate cmp = Assembler::neq; 21623 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21624 KRegister mask = k0; // The comparison itself is not being masked. 21625 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21626 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21627 %} 21628 ins_pipe( pipe_slow ); 21629 %} 21630 21631 instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ 21632 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21633 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21634 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21635 match(Set dst (VectorMaskCmp src1 src2)); 21636 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} 21637 ins_encode %{ 21638 int vector_len = 0; 21639 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21640 %} 21641 ins_pipe( pipe_slow ); 21642 %} 21643 21644 instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ 21645 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21646 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21647 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21648 match(Set dst (VectorMaskCmp src1 src2)); 21649 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} 21650 ins_encode %{ 21651 int vector_len = 0; 21652 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21653 %} 21654 ins_pipe( pipe_slow ); 21655 %} 21656 21657 instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ 21658 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21659 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21660 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21661 match(Set dst (VectorMaskCmp src1 src2)); 21662 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} 21663 ins_encode %{ 21664 int vector_len = 1; 21665 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21666 %} 21667 ins_pipe( pipe_slow ); 21668 %} 21669 21670 instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21671 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21672 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21673 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21674 match(Set dst (VectorMaskCmp src1 src2)); 21675 effect(TEMP dst, TEMP scratch); 21676 format %{ "vpcmpeqq k2,$src1,$src2\n\t" 21677 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} 21678 ins_encode %{ 21679 int vector_len = 2; 21680 Assembler::ComparisonPredicate cmp = Assembler::eq; 21681 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21682 KRegister mask = k0; // The comparison itself is not being masked. 21683 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21684 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21685 %} 21686 ins_pipe( pipe_slow ); 21687 %} 21688 21689 instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ 21690 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21691 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21692 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21693 match(Set dst (VectorMaskCmp src1 src2)); 21694 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} 21695 ins_encode %{ 21696 int vector_len = 0; 21697 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21698 %} 21699 ins_pipe( pipe_slow ); 21700 %} 21701 21702 instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ 21703 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21704 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21705 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21706 match(Set dst (VectorMaskCmp src1 src2)); 21707 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} 21708 ins_encode %{ 21709 int vector_len = 0; 21710 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21711 %} 21712 ins_pipe( pipe_slow ); 21713 %} 21714 21715 instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ 21716 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21717 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21718 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21719 match(Set dst (VectorMaskCmp src1 src2)); 21720 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} 21721 ins_encode %{ 21722 int vector_len = 1; 21723 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21724 %} 21725 ins_pipe( pipe_slow ); 21726 %} 21727 21728 instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21729 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21730 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21731 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21732 match(Set dst (VectorMaskCmp src1 src2)); 21733 effect(TEMP dst, TEMP scratch); 21734 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 21735 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 21736 ins_encode %{ 21737 int vector_len = 2; 21738 Assembler::ComparisonPredicate cmp = Assembler::lt; 21739 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21740 KRegister mask = k0; // The comparison itself is not being masked. 21741 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21742 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21743 %} 21744 ins_pipe( pipe_slow ); 21745 %} 21746 21747 instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ 21748 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21749 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21750 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21751 match(Set dst (VectorMaskCmp src1 src2)); 21752 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} 21753 ins_encode %{ 21754 int vector_len = 0; 21755 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21756 %} 21757 ins_pipe( pipe_slow ); 21758 %} 21759 21760 instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ 21761 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21762 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21763 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21764 match(Set dst (VectorMaskCmp src1 src2)); 21765 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} 21766 ins_encode %{ 21767 int vector_len = 0; 21768 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21769 %} 21770 ins_pipe( pipe_slow ); 21771 %} 21772 21773 instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ 21774 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21775 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21776 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21777 match(Set dst (VectorMaskCmp src1 src2)); 21778 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} 21779 ins_encode %{ 21780 int vector_len = 1; 21781 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21782 %} 21783 ins_pipe( pipe_slow ); 21784 %} 21785 21786 instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21787 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21788 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21789 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21790 match(Set dst (VectorMaskCmp src1 src2)); 21791 effect(TEMP dst, TEMP scratch); 21792 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 21793 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 21794 ins_encode %{ 21795 int vector_len = 2; 21796 Assembler::ComparisonPredicate cmp = Assembler::nle; 21797 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21798 KRegister mask = k0; // The comparison itself is not being masked. 21799 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21800 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21801 %} 21802 ins_pipe( pipe_slow ); 21803 %} 21804 21805 instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21806 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21807 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21808 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21809 match(Set dst (VectorMaskCmp src1 src2)); 21810 effect(TEMP scratch); 21811 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 21812 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} 21813 ins_encode %{ 21814 int vector_len = 0; 21815 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21816 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21817 %} 21818 ins_pipe( pipe_slow ); 21819 %} 21820 21821 instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21822 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21823 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21824 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21825 match(Set dst (VectorMaskCmp src1 src2)); 21826 effect(TEMP scratch); 21827 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 21828 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} 21829 ins_encode %{ 21830 int vector_len = 0; 21831 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21832 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21833 %} 21834 ins_pipe( pipe_slow ); 21835 %} 21836 21837 instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21838 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21839 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21840 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21841 match(Set dst (VectorMaskCmp src1 src2)); 21842 effect(TEMP scratch); 21843 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 21844 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} 21845 ins_encode %{ 21846 int vector_len = 1; 21847 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21848 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21849 %} 21850 ins_pipe( pipe_slow ); 21851 %} 21852 21853 instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21854 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21855 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21856 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21857 match(Set dst (VectorMaskCmp src1 src2)); 21858 effect(TEMP dst, TEMP scratch); 21859 format %{ "vpcmpnltq k2,$src1,$src2\n\t" 21860 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} 21861 ins_encode %{ 21862 int vector_len = 2; 21863 Assembler::ComparisonPredicate cmp = Assembler::nlt; 21864 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21865 KRegister mask = k0; // The comparison itself is not being masked. 21866 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21867 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21868 %} 21869 ins_pipe( pipe_slow ); 21870 %} 21871 21872 instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21873 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21874 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21875 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21876 match(Set dst (VectorMaskCmp src1 src2)); 21877 effect(TEMP scratch); 21878 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 21879 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} 21880 ins_encode %{ 21881 int vector_len = 0; 21882 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21883 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21884 %} 21885 ins_pipe( pipe_slow ); 21886 %} 21887 21888 instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21889 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21890 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21891 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21892 match(Set dst (VectorMaskCmp src1 src2)); 21893 effect(TEMP scratch); 21894 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 21895 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} 21896 ins_encode %{ 21897 int vector_len = 0; 21898 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21899 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21900 %} 21901 ins_pipe( pipe_slow ); 21902 %} 21903 21904 instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21905 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21906 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21907 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21908 match(Set dst (VectorMaskCmp src1 src2)); 21909 effect(TEMP scratch); 21910 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 21911 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} 21912 ins_encode %{ 21913 int vector_len = 1; 21914 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21915 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21916 %} 21917 ins_pipe( pipe_slow ); 21918 %} 21919 21920 instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21921 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21922 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21923 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21924 match(Set dst (VectorMaskCmp src1 src2)); 21925 effect(TEMP dst, TEMP scratch); 21926 format %{ "vpcmpleq k2,$src1,$src2\n\t" 21927 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} 21928 ins_encode %{ 21929 int vector_len = 2; 21930 Assembler::ComparisonPredicate cmp = Assembler::le; 21931 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21932 KRegister mask = k0; // The comparison itself is not being masked. 21933 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21934 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21935 %} 21936 ins_pipe( pipe_slow ); 21937 %} 21938 21939 instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21940 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21941 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21942 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21943 match(Set dst (VectorMaskCmp src1 src2)); 21944 effect(TEMP scratch); 21945 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 21946 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} 21947 ins_encode %{ 21948 int vector_len = 0; 21949 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21950 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21951 %} 21952 ins_pipe( pipe_slow ); 21953 %} 21954 21955 instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21956 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21957 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21958 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21959 match(Set dst (VectorMaskCmp src1 src2)); 21960 effect(TEMP scratch); 21961 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 21962 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} 21963 ins_encode %{ 21964 int vector_len = 0; 21965 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21966 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21967 %} 21968 ins_pipe( pipe_slow ); 21969 %} 21970 21971 instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21972 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21973 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21974 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21975 match(Set dst (VectorMaskCmp src1 src2)); 21976 effect(TEMP scratch); 21977 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 21978 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} 21979 ins_encode %{ 21980 int vector_len = 1; 21981 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21982 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21983 %} 21984 ins_pipe( pipe_slow ); 21985 %} 21986 21987 instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21988 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21989 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21990 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21991 match(Set dst (VectorMaskCmp src1 src2)); 21992 effect(TEMP dst, TEMP scratch); 21993 format %{ "vpcmpneqq k2,$src1,$src2\n\t" 21994 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} 21995 ins_encode %{ 21996 int vector_len = 2; 21997 Assembler::ComparisonPredicate cmp = Assembler::neq; 21998 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21999 KRegister mask = k0; // The comparison itself is not being masked. 22000 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 22001 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 22002 %} 22003 ins_pipe( pipe_slow ); 22004 %} 22005 22006 instruct blendvps2F(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22007 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22008 match(Set dst (VectorBlend (Binary dst src) mask)); 22009 effect(TEMP xmm_0); 22010 format %{ "blendvps $dst,$src,$mask\t! packed2F" %} 22011 ins_encode %{ 22012 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22013 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22014 } 22015 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 22016 %} 22017 ins_pipe( pipe_slow ); 22018 %} 22019 22020 instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22021 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22022 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22023 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} 22024 ins_encode %{ 22025 int vector_len = 0; 22026 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22027 %} 22028 ins_pipe( pipe_slow ); 22029 %} 22030 22031 instruct blendvps4F(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22032 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22033 match(Set dst (VectorBlend (Binary dst src) mask)); 22034 effect(TEMP xmm_0); 22035 format %{ "blendvps $dst,$src,$mask\t! packed4F" %} 22036 ins_encode %{ 22037 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22038 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22039 } 22040 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 22041 %} 22042 ins_pipe( pipe_slow ); 22043 %} 22044 22045 instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22046 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22047 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22048 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} 22049 ins_encode %{ 22050 int vector_len = 0; 22051 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22052 %} 22053 ins_pipe( pipe_slow ); 22054 %} 22055 22056 instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22057 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22058 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22059 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} 22060 ins_encode %{ 22061 int vector_len = 1; 22062 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22063 %} 22064 ins_pipe( pipe_slow ); 22065 %} 22066 22067 instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22068 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22069 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22070 effect(TEMP scratch); 22071 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 22072 "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} 22073 ins_encode %{ 22074 int vector_len = 2; 22075 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22076 __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22077 __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22078 %} 22079 ins_pipe( pipe_slow ); 22080 %} 22081 22082 instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22083 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22084 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22085 effect(TEMP scratch); 22086 format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 22087 "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} 22088 ins_encode %{ 22089 int vector_len = 2; 22090 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22091 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22092 __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22093 %} 22094 ins_pipe( pipe_slow ); 22095 %} 22096 22097 instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22098 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); 22099 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22100 effect(TEMP scratch); 22101 format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" 22102 "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} 22103 ins_encode %{ 22104 int vector_len = 2; 22105 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22106 __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22107 __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22108 %} 22109 ins_pipe( pipe_slow ); 22110 %} 22111 22112 instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22113 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); 22114 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22115 effect(TEMP scratch); 22116 format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" 22117 "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} 22118 ins_encode %{ 22119 int vector_len = 2; 22120 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22121 __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22122 __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22123 %} 22124 ins_pipe( pipe_slow ); 22125 %} 22126 22127 instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22128 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22129 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22130 effect(TEMP scratch); 22131 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 22132 "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} 22133 ins_encode %{ 22134 int vector_len = 2; 22135 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22136 __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22137 __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22138 %} 22139 ins_pipe( pipe_slow ); 22140 %} 22141 22142 instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22143 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22144 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22145 effect(TEMP scratch); 22146 format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 22147 "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} 22148 ins_encode %{ 22149 int vector_len = 2; 22150 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22151 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22152 __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22153 %} 22154 ins_pipe( pipe_slow ); 22155 %} 22156 22157 22158 instruct pblendvb2I(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22159 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22160 match(Set dst (VectorBlend (Binary dst src) mask)); 22161 effect(TEMP xmm_0); 22162 format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} 22163 ins_encode %{ 22164 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22165 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22166 } 22167 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22168 %} 22169 ins_pipe( pipe_slow ); 22170 %} 22171 22172 instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22173 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22174 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22175 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} 22176 ins_encode %{ 22177 int vector_len = 0; 22178 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22179 %} 22180 ins_pipe( pipe_slow ); 22181 %} 22182 22183 instruct pblendvb4I(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22184 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22185 match(Set dst (VectorBlend (Binary dst src) mask)); 22186 effect(TEMP xmm_0); 22187 format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} 22188 ins_encode %{ 22189 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22190 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22191 } 22192 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22193 %} 22194 ins_pipe( pipe_slow ); 22195 %} 22196 22197 instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22198 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22199 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22200 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} 22201 ins_encode %{ 22202 int vector_len = 0; 22203 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22204 %} 22205 ins_pipe( pipe_slow ); 22206 %} 22207 22208 instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22209 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22210 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22211 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} 22212 ins_encode %{ 22213 int vector_len = 1; 22214 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22215 %} 22216 ins_pipe( pipe_slow ); 22217 %} 22218 22219 instruct pblendvb8B(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22220 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22221 match(Set dst (VectorBlend (Binary dst src) mask)); 22222 effect(TEMP xmm_0); 22223 format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} 22224 ins_encode %{ 22225 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22226 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22227 } 22228 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22229 %} 22230 ins_pipe( pipe_slow ); 22231 %} 22232 22233 instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22234 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22235 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22236 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} 22237 ins_encode %{ 22238 int vector_len = 0; 22239 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22240 %} 22241 ins_pipe( pipe_slow ); 22242 %} 22243 22244 instruct pblendvb16B(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22245 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22246 match(Set dst (VectorBlend (Binary dst src) mask)); 22247 effect(TEMP xmm_0); 22248 format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} 22249 ins_encode %{ 22250 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22251 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22252 } 22253 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22254 %} 22255 ins_pipe( pipe_slow ); 22256 %} 22257 22258 instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22259 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22260 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22261 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} 22262 ins_encode %{ 22263 int vector_len = 0; 22264 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22265 %} 22266 ins_pipe( pipe_slow ); 22267 %} 22268 22269 instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22270 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22271 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22272 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} 22273 ins_encode %{ 22274 int vector_len = 1; 22275 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22276 %} 22277 ins_pipe( pipe_slow ); 22278 %} 22279 22280 instruct pblendvb4S(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22281 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22282 match(Set dst (VectorBlend (Binary dst src) mask)); 22283 effect(TEMP xmm_0); 22284 format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} 22285 ins_encode %{ 22286 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22287 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22288 } 22289 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22290 %} 22291 ins_pipe( pipe_slow ); 22292 %} 22293 22294 instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22295 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22296 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22297 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} 22298 ins_encode %{ 22299 int vector_len = 0; 22300 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22301 %} 22302 ins_pipe( pipe_slow ); 22303 %} 22304 22305 instruct pblendvb8S(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22306 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22307 match(Set dst (VectorBlend (Binary dst src) mask)); 22308 effect(TEMP xmm_0); 22309 format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} 22310 ins_encode %{ 22311 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22312 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22313 } 22314 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22315 %} 22316 ins_pipe( pipe_slow ); 22317 %} 22318 22319 instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22320 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22321 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22322 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} 22323 ins_encode %{ 22324 int vector_len = 0; 22325 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22326 %} 22327 ins_pipe( pipe_slow ); 22328 %} 22329 22330 instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22331 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22332 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22333 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} 22334 ins_encode %{ 22335 int vector_len = 1; 22336 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22337 %} 22338 ins_pipe( pipe_slow ); 22339 %} 22340 22341 instruct pblendvb1L(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22342 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22343 match(Set dst (VectorBlend (Binary dst src) mask)); 22344 effect(TEMP xmm_0); 22345 format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} 22346 ins_encode %{ 22347 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22348 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22349 } 22350 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22351 %} 22352 ins_pipe( pipe_slow ); 22353 %} 22354 22355 instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22356 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22357 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22358 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} 22359 ins_encode %{ 22360 int vector_len = 0; 22361 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22362 %} 22363 ins_pipe( pipe_slow ); 22364 %} 22365 22366 instruct pblendvb2L(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22367 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22368 match(Set dst (VectorBlend (Binary dst src) mask)); 22369 effect(TEMP xmm_0); 22370 format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} 22371 ins_encode %{ 22372 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22373 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22374 } 22375 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22376 %} 22377 ins_pipe( pipe_slow ); 22378 %} 22379 22380 instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22381 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22382 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22383 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} 22384 ins_encode %{ 22385 int vector_len = 0; 22386 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22387 %} 22388 ins_pipe( pipe_slow ); 22389 %} 22390 22391 instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22392 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22393 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22394 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} 22395 ins_encode %{ 22396 int vector_len = 1; 22397 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22398 %} 22399 ins_pipe( pipe_slow ); 22400 %} 22401 22402 instruct blendvpd1D(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22403 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22404 match(Set dst (VectorBlend (Binary dst src) mask)); 22405 effect(TEMP xmm_0); 22406 format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} 22407 ins_encode %{ 22408 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22409 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22410 } 22411 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 22412 %} 22413 ins_pipe( pipe_slow ); 22414 %} 22415 22416 instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22417 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22418 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22419 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} 22420 ins_encode %{ 22421 int vector_len = 0; 22422 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22423 %} 22424 ins_pipe( pipe_slow ); 22425 %} 22426 22427 instruct blendvpd2D(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22428 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22429 match(Set dst (VectorBlend (Binary dst src) mask)); 22430 effect(TEMP xmm_0); 22431 format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} 22432 ins_encode %{ 22433 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22434 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22435 } 22436 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 22437 %} 22438 ins_pipe( pipe_slow ); 22439 %} 22440 22441 instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22442 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22443 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22444 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} 22445 ins_encode %{ 22446 int vector_len = 0; 22447 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22448 %} 22449 ins_pipe( pipe_slow ); 22450 %} 22451 22452 instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22453 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22454 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22455 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} 22456 ins_encode %{ 22457 int vector_len = 1; 22458 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22459 %} 22460 ins_pipe( pipe_slow ); 22461 %} 22462 22463 // --------------------------------- NEG -------------------------------------- 22464 // a = -a 22465 instruct vneg2I_reg(vecD dst, vecD src) %{ 22466 predicate(UseSSE > 1 && n->as_Vector()->length() == 2); 22467 match(Set dst (NegVI src)); 22468 effect(TEMP dst); 22469 format %{ "pxor $dst,$dst\n\t" 22470 "psubd $dst, $src\t! neg packed2I" %} 22471 ins_cost(150); 22472 ins_encode %{ 22473 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 22474 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 22475 %} 22476 ins_pipe( pipe_slow ); 22477 %} 22478 22479 instruct vneg4I_reg(vecX dst, vecX src) %{ 22480 predicate(UseSSE > 1 && n->as_Vector()->length() == 4); 22481 match(Set dst (NegVI src)); 22482 effect(TEMP dst); 22483 format %{ "pxor $dst,$dst\n\t" 22484 "psubd $dst, $src\t! neg packed4I" %} 22485 ins_cost(150); 22486 ins_encode %{ 22487 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 22488 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 22489 %} 22490 ins_pipe( pipe_slow ); 22491 %} 22492 22493 instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ 22494 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 22495 match(Set dst (NegVI src)); 22496 effect(TEMP tmp); 22497 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 22498 "vpsubd $dst,$tmp,$src\t! neg packed8I" %} 22499 ins_cost(150); 22500 ins_encode %{ 22501 int vector_len = 1; 22502 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 22503 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 22504 %} 22505 ins_pipe( pipe_slow ); 22506 %} 22507 22508 instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ 22509 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 22510 match(Set dst (NegVI src)); 22511 effect(TEMP tmp); 22512 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 22513 "vpsubd $dst,$tmp,$src\t! neg packed16I" %} 22514 ins_cost(150); 22515 ins_encode %{ 22516 int vector_len = 2; 22517 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 22518 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 22519 %} 22520 ins_pipe( pipe_slow ); 22521 %} 22522 22523 instruct vneg1D(regD dst) %{ 22524 predicate((UseSSE>=2) && (UseAVX == 0)); 22525 match(Set dst (NegVD dst)); 22526 ins_cost(150); 22527 format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} 22528 ins_encode %{ 22529 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 22530 %} 22531 ins_pipe(pipe_slow); 22532 %} 22533 22534 instruct vneg1D_reg(vecX dst, vecX src) %{ 22535 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 22536 match(Set dst (NegVD src)); 22537 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} 22538 ins_cost(150); 22539 ins_encode %{ 22540 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 22541 ExternalAddress(double_signflip())); 22542 %} 22543 ins_pipe( pipe_slow ); 22544 %} 22545 22546 instruct vneg2D_reg(vecX dst) %{ 22547 predicate((UseSSE>=2)); 22548 match(Set dst (NegVD dst)); 22549 ins_cost(150); 22550 format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} 22551 ins_encode %{ 22552 __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); 22553 %} 22554 ins_pipe(pipe_slow); 22555 %} 22556 22557 22558 instruct vneg4D_reg(vecY dst, vecY src) %{ 22559 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 22560 match(Set dst (NegVD src)); 22561 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} 22562 ins_cost(150); 22563 ins_encode %{ 22564 int vector_len = 1; 22565 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 22566 %} 22567 ins_pipe( pipe_slow ); 22568 %} 22569 22570 instruct vneg8D_reg(vecZ dst, vecZ src) %{ 22571 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 22572 match(Set dst (NegVD src)); 22573 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} 22574 ins_cost(150); 22575 ins_encode %{ 22576 int vector_len = 2; 22577 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 22578 %} 22579 ins_pipe( pipe_slow ); 22580 %} 22581 22582 instruct vneg2F_reg(vecD dst) %{ 22583 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 22584 match(Set dst (NegVF dst)); 22585 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} 22586 ins_cost(150); 22587 ins_encode %{ 22588 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 22589 %} 22590 ins_pipe( pipe_slow ); 22591 %} 22592 22593 instruct vneg4F_reg(vecX dst) %{ 22594 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 22595 match(Set dst (NegVF dst)); 22596 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} 22597 ins_cost(150); 22598 ins_encode %{ 22599 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 22600 %} 22601 ins_pipe( pipe_slow ); 22602 %} 22603 22604 instruct vneg8F_reg(vecY dst, vecY src) %{ 22605 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 22606 match(Set dst (NegVF src)); 22607 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} 22608 ins_cost(150); 22609 ins_encode %{ 22610 int vector_len = 1; 22611 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 22612 %} 22613 ins_pipe( pipe_slow ); 22614 %} 22615 22616 instruct vneg16F_reg(vecZ dst, vecZ src) %{ 22617 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 22618 match(Set dst (NegVF src)); 22619 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} 22620 ins_cost(150); 22621 ins_encode %{ 22622 int vector_len = 2; 22623 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 22624 %} 22625 ins_pipe( pipe_slow ); 22626 %} 22627 22628 // --------------------------------- ABS -------------------------------------- 22629 // a = |a| 22630 instruct vabs8B_reg(vecD dst, vecD src) %{ 22631 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22632 match(Set dst (AbsV src)); 22633 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 22634 ins_cost(150); 22635 ins_encode %{ 22636 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 22637 %} 22638 ins_pipe( pipe_slow ); 22639 %} 22640 22641 instruct vabs16B_reg(vecX dst, vecX src) %{ 22642 predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22643 match(Set dst (AbsV src)); 22644 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 22645 ins_cost(150); 22646 ins_encode %{ 22647 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 22648 %} 22649 ins_pipe( pipe_slow ); 22650 %} 22651 22652 instruct vabs32B_reg(vecY dst, vecY src) %{ 22653 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22654 match(Set dst (AbsV src)); 22655 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 22656 ins_cost(150); 22657 ins_encode %{ 22658 int vector_len = 1; 22659 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22660 %} 22661 ins_pipe( pipe_slow ); 22662 %} 22663 22664 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 22665 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22666 match(Set dst (AbsV src)); 22667 format %{ "evpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 22668 ins_cost(150); 22669 ins_encode %{ 22670 int vector_len = 2; 22671 __ evpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22672 %} 22673 ins_pipe( pipe_slow ); 22674 %} 22675 22676 instruct vabs4S_reg(vecD dst, vecD src) %{ 22677 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22678 match(Set dst (AbsV src)); 22679 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 22680 ins_cost(150); 22681 ins_encode %{ 22682 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 22683 %} 22684 ins_pipe( pipe_slow ); 22685 %} 22686 22687 instruct vabs8S_reg(vecX dst, vecX src) %{ 22688 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22689 match(Set dst (AbsV src)); 22690 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 22691 ins_cost(150); 22692 ins_encode %{ 22693 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 22694 %} 22695 ins_pipe( pipe_slow ); 22696 %} 22697 22698 instruct vabs16S_reg(vecY dst, vecY src) %{ 22699 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22700 match(Set dst (AbsV src)); 22701 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 22702 ins_cost(150); 22703 ins_encode %{ 22704 int vector_len = 1; 22705 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22706 %} 22707 ins_pipe( pipe_slow ); 22708 %} 22709 22710 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 22711 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22712 match(Set dst (AbsV src)); 22713 format %{ "evpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 22714 ins_cost(150); 22715 ins_encode %{ 22716 int vector_len = 2; 22717 __ evpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22718 %} 22719 ins_pipe( pipe_slow ); 22720 %} 22721 22722 instruct vabs2I_reg(vecD dst, vecD src) %{ 22723 predicate(UseSSE > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22724 match(Set dst (AbsV src)); 22725 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 22726 ins_cost(150); 22727 ins_encode %{ 22728 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 22729 %} 22730 ins_pipe( pipe_slow ); 22731 %} 22732 22733 instruct vabs4I_reg(vecX dst, vecX src) %{ 22734 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22735 match(Set dst (AbsV src)); 22736 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 22737 ins_cost(150); 22738 ins_encode %{ 22739 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 22740 %} 22741 ins_pipe( pipe_slow ); 22742 %} 22743 22744 instruct vabs8I_reg(vecY dst, vecY src) %{ 22745 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22746 match(Set dst (AbsV src)); 22747 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 22748 ins_cost(150); 22749 ins_encode %{ 22750 int vector_len = 1; 22751 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22752 %} 22753 ins_pipe( pipe_slow ); 22754 %} 22755 22756 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 22757 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22758 match(Set dst (AbsV src)); 22759 format %{ "evpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 22760 ins_cost(150); 22761 ins_encode %{ 22762 int vector_len = 2; 22763 __ evpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22764 %} 22765 ins_pipe( pipe_slow ); 22766 %} 22767 22768 instruct vabs2L_reg(vecX dst, vecX src) %{ 22769 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22770 match(Set dst (AbsV src)); 22771 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 22772 ins_cost(150); 22773 ins_encode %{ 22774 int vector_len = 0; 22775 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22776 %} 22777 ins_pipe( pipe_slow ); 22778 %} 22779 22780 instruct vabs4L_reg(vecY dst, vecY src) %{ 22781 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22782 match(Set dst (AbsV src)); 22783 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 22784 ins_cost(150); 22785 ins_encode %{ 22786 int vector_len = 1; 22787 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22788 %} 22789 ins_pipe( pipe_slow ); 22790 %} 22791 22792 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 22793 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22794 match(Set dst (AbsV src)); 22795 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 22796 ins_cost(150); 22797 ins_encode %{ 22798 int vector_len = 2; 22799 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22800 %} 22801 ins_pipe( pipe_slow ); 22802 %} 22803 22804 instruct vabs1D_reg(vecD dst) %{ 22805 predicate(UseSSE > 0 && n->as_Vector()->length() == 1); 22806 match(Set dst (AbsVD dst)); 22807 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} 22808 ins_cost(150); 22809 ins_encode %{ 22810 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 22811 %} 22812 ins_pipe( pipe_slow ); 22813 %} 22814 22815 instruct vabs2D_reg(vecX dst) %{ 22816 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 22817 match(Set dst (AbsVD dst)); 22818 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} 22819 ins_cost(150); 22820 ins_encode %{ 22821 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 22822 %} 22823 ins_pipe( pipe_slow ); 22824 %} 22825 22826 instruct vabs4D_reg(vecY dst, vecY src) %{ 22827 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 22828 match(Set dst (AbsVD src)); 22829 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} 22830 ins_cost(150); 22831 ins_encode %{ 22832 int vector_len = 1; 22833 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 22834 %} 22835 ins_pipe( pipe_slow ); 22836 %} 22837 22838 instruct vabs8D_reg(vecZ dst, vecZ src) %{ 22839 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 22840 match(Set dst (AbsVD src)); 22841 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} 22842 ins_cost(150); 22843 ins_encode %{ 22844 int vector_len = 2; 22845 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 22846 %} 22847 ins_pipe( pipe_slow ); 22848 %} 22849 22850 instruct vabs2F_reg(vecD dst) %{ 22851 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 22852 match(Set dst (AbsVF dst)); 22853 format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} 22854 ins_cost(150); 22855 ins_encode %{ 22856 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 22857 %} 22858 ins_pipe( pipe_slow ); 22859 %} 22860 22861 instruct vabs4F_reg(vecX dst) %{ 22862 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 22863 match(Set dst (AbsVF dst)); 22864 format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} 22865 ins_cost(150); 22866 ins_encode %{ 22867 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 22868 %} 22869 ins_pipe( pipe_slow ); 22870 %} 22871 22872 instruct vabs8F_reg(vecY dst, vecY src) %{ 22873 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 22874 match(Set dst (AbsVF src)); 22875 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} 22876 ins_cost(150); 22877 ins_encode %{ 22878 int vector_len = 1; 22879 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 22880 %} 22881 ins_pipe( pipe_slow ); 22882 %} 22883 22884 instruct vabs16F_reg(vecZ dst, vecZ src) %{ 22885 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 22886 match(Set dst (AbsVF src)); 22887 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} 22888 ins_cost(150); 22889 ins_encode %{ 22890 int vector_len = 2; 22891 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 22892 %} 22893 ins_pipe( pipe_slow ); 22894 %} 22895 22896 //------------------------------------- NOT -------------------------------------------- 22897 instruct vnot4B(vecS dst, vecS src) %{ 22898 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); 22899 match(Set dst (NotV src)); 22900 effect(TEMP dst); 22901 format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} 22902 ins_encode %{ 22903 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 22904 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 22905 %} 22906 ins_pipe( pipe_slow ); 22907 %} 22908 22909 instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ 22910 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 22911 match(Set dst (NotV src)); 22912 effect(TEMP scratch); 22913 format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} 22914 ins_encode %{ 22915 int vector_len = 0; 22916 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22917 %} 22918 ins_pipe( pipe_slow ); 22919 %} 22920 22921 instruct vnot8B(vecD dst, vecD src) %{ 22922 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); 22923 match(Set dst (NotV src)); 22924 effect(TEMP dst); 22925 format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} 22926 ins_encode %{ 22927 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 22928 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 22929 %} 22930 ins_pipe( pipe_slow ); 22931 %} 22932 22933 instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ 22934 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 22935 match(Set dst (NotV src)); 22936 effect(TEMP scratch); 22937 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} 22938 ins_encode %{ 22939 int vector_len = 0; 22940 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22941 %} 22942 ins_pipe( pipe_slow ); 22943 %} 22944 22945 instruct vnot16B(vecX dst, vecX src) %{ 22946 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); 22947 match(Set dst (NotV src)); 22948 effect(TEMP dst); 22949 format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} 22950 ins_encode %{ 22951 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 22952 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 22953 %} 22954 ins_pipe( pipe_slow ); 22955 %} 22956 22957 instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ 22958 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 22959 match(Set dst (NotV src)); 22960 effect(TEMP scratch); 22961 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} 22962 ins_encode %{ 22963 int vector_len = 0; 22964 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22965 %} 22966 ins_pipe( pipe_slow ); 22967 %} 22968 22969 instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ 22970 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 22971 match(Set dst (NotV src)); 22972 effect(TEMP scratch); 22973 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} 22974 ins_encode %{ 22975 int vector_len = 1; 22976 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22977 %} 22978 ins_pipe( pipe_slow ); 22979 %} 22980 22981 instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ 22982 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 22983 match(Set dst (NotV src)); 22984 effect(TEMP scratch); 22985 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} 22986 ins_encode %{ 22987 int vector_len = 2; 22988 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22989 %} 22990 ins_pipe( pipe_slow ); 22991 %} 22992 22993 instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{ 22994 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 22995 match(Set dst (VectorTest src1 src2 )); 22996 format %{ "vptest $src1,$src2\n\t" 22997 "setb $dst\t!" %} 22998 ins_encode %{ 22999 int vector_len = 0; 23000 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23001 __ setb(Assembler::carrySet, $dst$$Register); 23002 __ movzbl($dst$$Register, $dst$$Register); 23003 %} 23004 ins_pipe( pipe_slow ); 23005 %} 23006 23007 instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{ 23008 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 23009 match(Set dst (VectorTest src1 src2 )); 23010 format %{ "vptest $src1,$src2\n\t" 23011 "setb $dst\t!" %} 23012 ins_encode %{ 23013 int vector_len = 0; 23014 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23015 __ setb(Assembler::notZero, $dst$$Register); 23016 __ movzbl($dst$$Register, $dst$$Register); 23017 %} 23018 ins_pipe( pipe_slow ); 23019 %} 23020 23021 instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{ 23022 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 23023 match(Set dst (VectorTest src1 src2 )); 23024 format %{ "vptest $src1,$src2\n\t" 23025 "setb $dst\t!" %} 23026 ins_encode %{ 23027 int vector_len = 1; 23028 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23029 __ setb(Assembler::carrySet, $dst$$Register); 23030 __ movzbl($dst$$Register, $dst$$Register); 23031 %} 23032 ins_pipe( pipe_slow ); 23033 %} 23034 23035 instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{ 23036 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 23037 match(Set dst (VectorTest src1 src2 )); 23038 format %{ "vptest $src1,$src2\n\t" 23039 "setb $dst\t!" %} 23040 ins_encode %{ 23041 int vector_len = 1; 23042 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23043 __ setb(Assembler::notZero, $dst$$Register); 23044 __ movzbl($dst$$Register, $dst$$Register); 23045 %} 23046 ins_pipe( pipe_slow ); 23047 %} 23048 23049 instruct loadmask8b(vecD dst, vecD src) %{ 23050 predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23051 match(Set dst (VectorLoadMask src)); 23052 effect(TEMP dst); 23053 format %{ "pxor $dst,$dst\n\t" 23054 "psubb $dst,$src\t! load mask (8B to 8B)" %} 23055 ins_encode %{ 23056 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23057 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23058 %} 23059 ins_pipe( pipe_slow ); 23060 %} 23061 23062 instruct loadmask16b(vecX dst, vecX src) %{ 23063 predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23064 match(Set dst (VectorLoadMask src)); 23065 effect(TEMP dst); 23066 format %{ "vpxor $dst,$dst\n\t" 23067 "vpsubb $dst,$src\t! load mask (16B to 16B)" %} 23068 ins_encode %{ 23069 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23070 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23071 %} 23072 ins_pipe( pipe_slow ); 23073 %} 23074 23075 instruct loadmask32b(vecY dst, vecY src) %{ 23076 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23077 match(Set dst (VectorLoadMask src)); 23078 effect(TEMP dst); 23079 format %{ "vpxor $dst,$dst\n\t" 23080 "vpsubb $dst,$src\t! load mask (32B to 32B)" %} 23081 ins_encode %{ 23082 int vector_len = 1; 23083 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23084 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 23085 %} 23086 ins_pipe( pipe_slow ); 23087 %} 23088 23089 instruct loadmask64b(vecZ dst, vecZ src) %{ 23090 predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23091 match(Set dst (VectorLoadMask src)); 23092 effect(TEMP dst); 23093 format %{ "vpxor $dst,$dst\n\t" 23094 "vpsubb $dst,$src\t! load mask (64B to 64B)" %} 23095 ins_encode %{ 23096 int vector_len = 2; 23097 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23098 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 23099 %} 23100 ins_pipe( pipe_slow ); 23101 %} 23102 23103 instruct loadmask4s(vecD dst, vecS src) %{ 23104 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23105 match(Set dst (VectorLoadMask src)); 23106 effect(TEMP dst); 23107 format %{ "pxor $dst,$dst\n\t" 23108 "psubb $dst,$src\n\t" 23109 "pmovsxbw $dst\t! load mask (4B to 4S)" %} 23110 ins_encode %{ 23111 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23112 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23113 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 23114 %} 23115 ins_pipe( pipe_slow ); 23116 %} 23117 23118 instruct loadmask8s(vecX dst, vecD src) %{ 23119 predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23120 match(Set dst (VectorLoadMask src)); 23121 effect(TEMP dst); 23122 format %{ "pxor $dst,$dst\n\t" 23123 "psubb $dst,$src\n\t" 23124 "pmovsxbw $dst\t! load mask (8B to 8S)" %} 23125 ins_encode %{ 23126 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23127 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23128 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 23129 %} 23130 ins_pipe( pipe_slow ); 23131 %} 23132 23133 instruct loadmask16s(vecY dst, vecX src) %{ 23134 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23135 match(Set dst (VectorLoadMask src)); 23136 effect(TEMP dst); 23137 format %{ "vpxor $dst,$dst\n\t" 23138 "vpsubb $dst,$src\n\t" 23139 "vpmovsxbw $dst\t! load mask (16B to 16S)" %} 23140 ins_encode %{ 23141 int vector_len = 1; 23142 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23143 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 23144 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23145 %} 23146 ins_pipe( pipe_slow ); 23147 %} 23148 23149 instruct loadmask32s(vecZ dst, vecY src) %{ 23150 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23151 match(Set dst (VectorLoadMask src)); 23152 effect(TEMP dst); 23153 format %{ "vpxor $dst,$dst\n\t" 23154 "vpsubb $dst,$src\n\t" 23155 "vpmovsxbw $dst\t! load mask (32B to 32S)" %} 23156 ins_encode %{ 23157 int vector_len = 2; 23158 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); 23159 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); 23160 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23161 %} 23162 ins_pipe( pipe_slow ); 23163 %} 23164 23165 instruct loadmask2i(vecD dst, vecS src) %{ 23166 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && 23167 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23168 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23169 match(Set dst (VectorLoadMask src)); 23170 effect(TEMP dst); 23171 format %{ "pxor $dst,$dst\n\t" 23172 "psubb $dst,$src\n\t" 23173 "pmovsxbd $dst\t! load mask (2B to 2I)" %} 23174 ins_encode %{ 23175 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23176 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23177 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 23178 %} 23179 ins_pipe( pipe_slow ); 23180 %} 23181 23182 instruct loadmask4i(vecX dst, vecS src) %{ 23183 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && 23184 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23185 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23186 match(Set dst (VectorLoadMask src)); 23187 effect(TEMP dst); 23188 format %{ "pxor $dst,$dst\n\t" 23189 "psubb $dst,$src\n\t" 23190 "pmovsxbd $dst\t! load mask (4B to 4I)" %} 23191 ins_encode %{ 23192 int vector_len = 0; 23193 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23194 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23195 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 23196 %} 23197 ins_pipe( pipe_slow ); 23198 %} 23199 23200 instruct loadmask8i(vecY dst, vecD src) %{ 23201 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && 23202 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23203 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23204 match(Set dst (VectorLoadMask src)); 23205 effect(TEMP dst); 23206 format %{ "vpxor $dst,$dst\n\t" 23207 "vpsubb $dst,$src\n\t" 23208 "vpmovsxbd $dst\t! load mask (8B to 8I)" %} 23209 ins_encode %{ 23210 int vector_len = 1; 23211 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23212 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 23213 __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23214 %} 23215 ins_pipe( pipe_slow ); 23216 %} 23217 23218 instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ 23219 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 23220 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23221 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23222 match(Set dst (VectorLoadMask src)); 23223 effect(TEMP dst, TEMP tmp); 23224 format %{ "vpxor $dst,$dst\n\t" 23225 "vpmovzxbd $tmp,$src\n\t" 23226 "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} 23227 ins_encode %{ 23228 int vector_len = 2; 23229 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23230 __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 23231 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23232 %} 23233 ins_pipe( pipe_slow ); 23234 %} 23235 23236 instruct loadmask1l(vecD dst, vecS src) %{ 23237 predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && 23238 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23239 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23240 match(Set dst (VectorLoadMask src)); 23241 effect(TEMP dst); 23242 format %{ "pxor $dst,$dst\n\t" 23243 "psubb $dst,$src\n\t" 23244 "pmovsxbq $dst\t! load mask (1B to 1L)" %} 23245 ins_encode %{ 23246 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23247 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23248 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 23249 %} 23250 ins_pipe( pipe_slow ); 23251 %} 23252 23253 instruct loadmask2l(vecX dst, vecS src) %{ 23254 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && 23255 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23256 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23257 match(Set dst (VectorLoadMask src)); 23258 effect(TEMP dst); 23259 format %{ "pxor $dst,$dst\n\t" 23260 "psubb $dst,$src\n\t" 23261 "pmovsxbq $dst\t! load mask (2B to 2L)" %} 23262 ins_encode %{ 23263 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23264 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23265 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 23266 %} 23267 ins_pipe( pipe_slow ); 23268 %} 23269 23270 instruct loadmask4l(vecY dst, vecS src) %{ 23271 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && 23272 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23273 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23274 match(Set dst (VectorLoadMask src)); 23275 effect(TEMP dst); 23276 format %{ "vpxor $dst,$dst\n\t" 23277 "vpsubb $dst,$src\n\t" 23278 "vpmovsxbq $dst\t! load mask (4B to 4L)" %} 23279 ins_encode %{ 23280 int vector_len = 1; 23281 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23282 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 23283 __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23284 %} 23285 ins_pipe( pipe_slow ); 23286 %} 23287 23288 instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ 23289 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 23290 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23291 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23292 match(Set dst (VectorLoadMask src)); 23293 effect(TEMP dst, TEMP tmp); 23294 format %{ "vpxor $dst,$dst\n\t" 23295 "vpmovzxbq $tmp,$src\n\t" 23296 "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} 23297 ins_encode %{ 23298 int vector_len = 2; 23299 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23300 __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 23301 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23302 %} 23303 ins_pipe( pipe_slow ); 23304 %} 23305 23306 instruct storemask8b(vecD dst, vecD src) %{ 23307 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23308 match(Set dst (VectorStoreMask src)); 23309 format %{ "vpabsb $dst,$src\t! store mask (8B to 8B)" %} 23310 ins_encode %{ 23311 int vector_len = 0; 23312 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23313 %} 23314 ins_pipe( pipe_slow ); 23315 %} 23316 23317 instruct storemask16b(vecX dst, vecX src) %{ 23318 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23319 match(Set dst (VectorStoreMask src)); 23320 format %{ "vpabsb $dst,$src\t! store mask (16B to 16B)" %} 23321 ins_encode %{ 23322 int vector_len = 0; 23323 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23324 %} 23325 ins_pipe( pipe_slow ); 23326 %} 23327 23328 instruct storemask32b(vecY dst, vecY src) %{ 23329 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23330 match(Set dst (VectorStoreMask src)); 23331 format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} 23332 ins_encode %{ 23333 int vector_len = 1; 23334 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23335 %} 23336 ins_pipe( pipe_slow ); 23337 %} 23338 23339 instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ 23340 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23341 match(Set dst (VectorStoreMask src)); 23342 effect(TEMP scratch); 23343 format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" 23344 "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} 23345 ins_encode %{ 23346 int vector_len = 2; 23347 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23348 Assembler::ComparisonPredicate cp = Assembler::eq; 23349 __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 23350 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, vector_len, $scratch$$Register); 23351 %} 23352 ins_pipe( pipe_slow ); 23353 %} 23354 23355 instruct storemask4s(vecS dst, vecD src) %{ 23356 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23357 match(Set dst (VectorStoreMask src)); 23358 format %{ "vpabsw $dst,$src\n\t" 23359 "vpackuswb $dst,$dst,$dst\t! store mask (4S to 4B)" %} 23360 ins_encode %{ 23361 int vector_len = 0; 23362 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23363 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23364 %} 23365 ins_pipe( pipe_slow ); 23366 %} 23367 23368 instruct storemask8s(vecD dst, vecX src) %{ 23369 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23370 match(Set dst (VectorStoreMask src)); 23371 format %{ "vpabsw $dst,$src\n\t" 23372 "vpackuswb $dst,$dst,$dst\t! store mask (8S to 8B)" %} 23373 ins_encode %{ 23374 int vector_len = 0; 23375 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23376 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23377 %} 23378 ins_pipe( pipe_slow ); 23379 %} 23380 23381 instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ 23382 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23383 match(Set dst (VectorStoreMask src)); 23384 effect(TEMP dst, TEMP tmp); 23385 format %{ "vpabsw $dst,$src\n\t" 23386 "vextracti128 $tmp,$dst\n\t" 23387 "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} 23388 ins_encode %{ 23389 int vector_len = 1; 23390 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23391 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 23392 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23393 %} 23394 ins_pipe( pipe_slow ); 23395 %} 23396 23397 instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ 23398 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23399 match(Set dst (VectorStoreMask src)); 23400 effect(TEMP scratch); 23401 format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" 23402 "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} 23403 ins_encode %{ 23404 int vector_len = 2; 23405 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23406 Assembler::ComparisonPredicate cp = Assembler::eq; 23407 __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 23408 // The dst is 256-bit - thus we can do a smaller move. 23409 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 1, $scratch$$Register); 23410 %} 23411 ins_pipe( pipe_slow ); 23412 %} 23413 23414 23415 instruct storemask2i(vecS dst, vecD src) %{ 23416 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23417 match(Set dst (VectorStoreMask src)); 23418 format %{ "vpabsd $dst,$src\n\t" 23419 "vpackusdw $dst,$dst,$dst\n\t" 23420 "vpackuswb $dst,$dst,$dst\t! store mask (2I to 2B)" %} 23421 ins_encode %{ 23422 int vector_len = 0; 23423 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23424 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23425 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23426 %} 23427 ins_pipe( pipe_slow ); 23428 %} 23429 23430 instruct storemask4i(vecS dst, vecX src) %{ 23431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23432 match(Set dst (VectorStoreMask src)); 23433 format %{ "vpabsd $dst,$src\n\t" 23434 "vpackusdw $dst,$dst,$dst\n\t" 23435 "vpackuswb $dst,$dst,$dst\t! store mask (4I to 4B)" %} 23436 ins_encode %{ 23437 int vector_len = 0; 23438 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23439 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23440 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23441 %} 23442 ins_pipe( pipe_slow ); 23443 %} 23444 23445 instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ 23446 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23447 match(Set dst (VectorStoreMask src)); 23448 effect(TEMP dst, TEMP tmp); 23449 format %{ "vpxor $dst,$dst\n\t" 23450 "vpsubd $dst,$src\n\t" 23451 "vextracti128 $tmp,$dst\n\t" 23452 "vpackusdw $dst,$dst,$tmp\n\t" 23453 "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} 23454 ins_encode %{ 23455 int vector_len = 1; 23456 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23457 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 23458 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 23459 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23460 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23461 %} 23462 ins_pipe( pipe_slow ); 23463 %} 23464 23465 instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ 23466 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23467 match(Set dst (VectorStoreMask src)); 23468 effect(TEMP scratch); 23469 format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" 23470 "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} 23471 ins_encode %{ 23472 int vector_len = 2; 23473 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23474 __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 23475 // The dst is only 128-bit - thus we can do a smaller move. 23476 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register); 23477 %} 23478 ins_pipe( pipe_slow ); 23479 %} 23480 23481 instruct storemask1l(vecS dst, vecD src) %{ 23482 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23483 match(Set dst (VectorStoreMask src)); 23484 format %{ "vpabsd $dst,$src\n\t" 23485 "vpackusdw $dst,$dst,$dst\n\t" 23486 "vpackuswb $dst,$dst,$dst\t! store mask (1L to 1B)" %} 23487 ins_encode %{ 23488 int vector_len = 0; 23489 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23490 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23491 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23492 %} 23493 ins_pipe( pipe_slow ); 23494 %} 23495 23496 instruct storemask2l(vecS dst, vecX src) %{ 23497 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23498 match(Set dst (VectorStoreMask src)); 23499 format %{ "vpshufd $dst,$src,0x8\n\t" 23500 "vpabsd $dst,$dst\n\t" 23501 "vpackusdw $dst,$dst,$dst\n\t" 23502 "vpackuswb $dst,$dst,$dst\t! store mask (2L to 2B)" %} 23503 ins_encode %{ 23504 int vector_len = 0; 23505 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8, vector_len); 23506 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23507 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23508 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23509 %} 23510 ins_pipe( pipe_slow ); 23511 %} 23512 23513 instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ 23514 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23515 match(Set dst (VectorStoreMask src)); 23516 effect(TEMP scratch, TEMP dst); 23517 format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" 23518 "vpermd $dst,$dst,$src," 23519 "vpabsd $dst,$dst\n\t" 23520 "vpackusdw $dst,$dst,$dst\n\t" 23521 "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} 23522 ins_encode %{ 23523 // vpermd and load are 256-bit, but all others are 128-bit instructions. 23524 int vector_len = 0; 23525 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); 23526 __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister); 23527 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23528 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23529 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23530 %} 23531 ins_pipe( pipe_slow ); 23532 %} 23533 23534 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ 23535 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23536 match(Set dst (VectorStoreMask src)); 23537 effect(TEMP scratch); 23538 format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" 23539 "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} 23540 ins_encode %{ 23541 int vector_len = 2; 23542 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23543 Assembler::ComparisonPredicate cp = Assembler::eq; 23544 __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 23545 // The dst is only 128-bit - thus we can do a smaller move. 23546 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register); 23547 %} 23548 ins_pipe( pipe_slow ); 23549 %} 23550 23551 //-------------------------------- LOAD_SHUFFLE ---------------------------------- 23552 23553 instruct loadshuffle8b(vecD dst, vecD src) %{ 23554 predicate(UseSSE > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23555 match(Set dst (VectorLoadShuffle src)); 23556 format %{ "movdqu $dst, $src\t! load shuffle (load 8B for 8BRearrange)" %} 23557 ins_encode %{ 23558 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 23559 %} 23560 ins_pipe( pipe_slow ); 23561 %} 23562 23563 instruct loadshuffle16b(vecX dst, vecX src) %{ 23564 predicate(UseSSE > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23565 match(Set dst (VectorLoadShuffle src)); 23566 format %{ "movdqu $dst, $src\t! load shuffle (load 16B for 16BRearrange)" %} 23567 ins_encode %{ 23568 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 23569 %} 23570 ins_pipe( pipe_slow ); 23571 %} 23572 23573 instruct loadshuffle32b(vecY dst, vecY src) %{ 23574 predicate(UseAVX > 0 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23575 match(Set dst (VectorLoadShuffle src)); 23576 format %{ "vmovdqu $dst, $src\t! load shuffle (load 32B for 32BRearrange)" %} 23577 ins_encode %{ 23578 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 23579 %} 23580 ins_pipe( pipe_slow ); 23581 %} 23582 23583 instruct loadshuffle64b(vecZ dst, vecZ src) %{ 23584 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23585 match(Set dst (VectorLoadShuffle src)); 23586 format %{ "vmovdqu $dst, $src\t! load shuffle (load 64B for 64BRearrange)" %} 23587 ins_encode %{ 23588 __ evmovdqul($dst$$XMMRegister, $src$$XMMRegister, 2); 23589 %} 23590 ins_pipe( pipe_slow ); 23591 %} 23592 23593 instruct loadshuffle4s(vecD dst, vecS src, vecD tmp, vecD tmp2, rRegI scratch) %{ 23594 predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23595 match(Set dst (VectorLoadShuffle src)); 23596 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); 23597 format %{ "pmovsxbw $tmp, $src \n\t" 23598 "movdqu $tmp2,0x0002000200020002\n\t" 23599 "pmullw $tmp,$tmp2\n\t" 23600 "movdqu $tmp2,$tmp\n\t" 23601 "psllw $tmp2,0x8\n\t" 23602 "paddb $tmp2,$tmp\n\t" 23603 "movdqu $tmp, 0x0100010001000100 \n\t" 23604 "paddb $tmp2,$tmp\n\t" 23605 "movdqu $dst, $tmp2\t! load shuffle (load 4B for 4SRearrange)" %} 23606 ins_encode %{ 23607 __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); 23608 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_sizemask()), $scratch$$Register); 23609 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 23610 __ movdqu($tmp2$$XMMRegister, $tmp$$XMMRegister); 23611 __ psllw($tmp2$$XMMRegister, 0x8); 23612 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23613 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 23614 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23615 __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); 23616 %} 23617 ins_pipe( pipe_slow ); 23618 %} 23619 23620 instruct loadshuffle8s(vecX dst, vecD src, vecX tmp, vecX tmp2, rRegI scratch) %{ 23621 predicate(UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23622 match(Set dst (VectorLoadShuffle src)); 23623 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); 23624 format %{ "pmovsxbw $tmp, $src \n\t" 23625 "movdqu $tmp2,0x0002000200020002\n\t" 23626 "pmullw $tmp,$tmp2\n\t" 23627 "movdqu $tmp2,$tmp\n\t" 23628 "psllw $tmp2,0x8\n\t" 23629 "paddb $tmp2,$tmp\n\t" 23630 "movdqu $tmp, 0x0100010001000100 \n\t" 23631 "paddb $tmp2,$tmp\n\t" 23632 "movdqu $dst, $tmp2\t! load shuffle (load 8B for 8SRearrange)" %} 23633 ins_encode %{ 23634 __ pmovsxbw($tmp$$XMMRegister, $src$$XMMRegister); 23635 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_sizemask()), $scratch$$Register); 23636 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 23637 __ movdqu($tmp2$$XMMRegister, $tmp$$XMMRegister); 23638 __ psllw($tmp2$$XMMRegister, 0x8); 23639 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23640 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 23641 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23642 __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); 23643 %} 23644 ins_pipe( pipe_slow ); 23645 %} 23646 23647 instruct loadshuffle16s(vecY dst, vecX src) %{ 23648 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23649 match(Set dst (VectorLoadShuffle src)); 23650 format %{ "vpmovsxbw $dst,$src\t! load shuffle (load 16B for 16SRearrange)" %} 23651 ins_encode %{ 23652 int vector_len = 1; 23653 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23654 %} 23655 ins_pipe( pipe_slow ); 23656 %} 23657 23658 instruct loadshuffle32s(vecZ dst, vecY src) %{ 23659 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23660 match(Set dst (VectorLoadShuffle src)); 23661 format %{ "vpmovsxbw $dst,$src\t! load shuffle (load 32B for 32SRearrange)" %} 23662 ins_encode %{ 23663 int vector_len = 2; 23664 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23665 %} 23666 ins_pipe( pipe_slow ); 23667 %} 23668 23669 instruct loadshuffle4i(vecX dst, vecS src, vecX tmp, vecX tmp2, rRegI scratch) %{ 23670 predicate(UseSSE > 3 && n->as_Vector()->length() == 4 && 23671 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23672 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23673 match(Set dst (VectorLoadShuffle src)); 23674 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); 23675 format %{ "vpmovsxbd $tmp, $src \n\t" 23676 "movdqu $tmp2, 0x0000000400000004 \n\t" 23677 "pmulld $tmp2, $tmp \n\t" 23678 "movdqu $tmp,$tmp2\n\t" 23679 "pslld $tmp2,0x8\n\t" 23680 "paddb $tmp2,$tmp\n\t" 23681 "pslld $tmp2,0x8\n\t" 23682 "paddb $tmp2,$tmp\n\t" 23683 "pslld $tmp2,0x8\n\t" 23684 "paddb $tmp2,$tmp\n\t" 23685 "movdqu $tmp, 0x0302010003020100 \n\t" 23686 "paddb $tmp2,$tmp\n\t" 23687 "movdqu $dst, $tmp2\t! load shuffle (load 4B for 4IRearrange)" %} 23688 ins_encode %{ 23689 __ vpmovsxbd($tmp$$XMMRegister, $src$$XMMRegister, 0); 23690 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_int_sizemask()), $scratch$$Register); 23691 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 23692 __ movdqu($tmp$$XMMRegister, $tmp2$$XMMRegister); 23693 __ pslld($tmp2$$XMMRegister, 0x8); 23694 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23695 __ pslld($tmp2$$XMMRegister, 0x8); 23696 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23697 __ pslld($tmp2$$XMMRegister, 0x8); 23698 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23699 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 23700 __ paddb($tmp2$$XMMRegister, $tmp$$XMMRegister); 23701 __ movdqu($dst$$XMMRegister, $tmp2$$XMMRegister); 23702 %} 23703 ins_pipe( pipe_slow ); 23704 %} 23705 23706 instruct loadshuffle8i(vecY dst, vecD src) %{ 23707 predicate(UseAVX >= 1 && n->as_Vector()->length() == 8 && 23708 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23709 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23710 match(Set dst (VectorLoadShuffle src)); 23711 format %{ "vpmovsxbd $dst, $src\t! load shuffle (load 8B for 8IRearrange)" %} 23712 ins_encode %{ 23713 int vector_len = 1; 23714 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23715 %} 23716 ins_pipe( pipe_slow ); 23717 %} 23718 23719 instruct loadshuffle16i(vecZ dst, vecX src) %{ 23720 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 23721 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23722 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23723 match(Set dst (VectorLoadShuffle src)); 23724 format %{ "vpmovsxbd $dst, $src\t! load shuffle (load 16B for 16IRearrange)" %} 23725 ins_encode %{ 23726 int vector_len = 2; 23727 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23728 %} 23729 ins_pipe( pipe_slow ); 23730 %} 23731 23732 instruct loadshuffle4l(vecY dst, vecS src, vecY tmp, vecY tmp2, rRegI scratch) %{ 23733 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 23734 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23735 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23736 match(Set dst (VectorLoadShuffle src)); 23737 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); 23738 format %{ "vpmovsxbd $tmp2, $src \n\t" 23739 "movdqu $tmp, 0x0000000200000002 \n\t" 23740 "pmulld $tmp, $tmp2 \n\t" 23741 "vpmovsxdq $tmp2,$tmp\n\t" 23742 "vpsllq $tmp2,0x20\n\t" 23743 "vpaddd $tmp2,$tmp\n\t" 23744 "vmovdqu $tmp, 0x0000000100000000 \n\t" 23745 "vpaddd $tmp2,$tmp\n\t" 23746 "vmovdqu $dst, $tmp2\t! load shuffle (load 4L for 4LRearrange)" %} 23747 ins_encode %{ 23748 int vector_len = 1; 23749 __ vpmovsxbd($tmp2$$XMMRegister, $src$$XMMRegister, 0); 23750 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sizemask()), $scratch$$Register); 23751 __ pmulld($tmp$$XMMRegister, $tmp2$$XMMRegister); 23752 __ vpmovsxdq($tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 23753 __ vpsllq($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x20, vector_len); 23754 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 23755 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_shufflemask()), $scratch$$Register); 23756 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 23757 __ vmovdqu($dst$$XMMRegister, $tmp2$$XMMRegister); 23758 %} 23759 ins_pipe( pipe_slow ); 23760 %} 23761 23762 instruct loadshuffle8l(vecZ dst, vecD src, vecZ tmp, vecZ tmp2, rRegI scratch) %{ 23763 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 23764 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23765 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23766 match(Set dst (VectorLoadShuffle src)); 23767 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP scratch); 23768 format %{ "vpmovsxbd $tmp2, $src \n\t" 23769 "movdqu $tmp, 0x0000000200000002 \n\t" 23770 "pmulld $tmp, $tmp2\n\t" 23771 "vpmovsxdq $tmp2,$tmp\n\t" 23772 "vpsllq $tmp2,0x20\n\t" 23773 "vpaddd $tmp2,$tmp\n\t" 23774 "vmovdqu $tmp, 0x0000000100000000 \n\t" 23775 "vpaddd $tmp2,$tmp\n\t" 23776 "vmovdqu $dst, $tmp2\t! load shuffle (load 8L for 8LRearrange)" %} 23777 ins_encode %{ 23778 int vector_len = 2; 23779 __ vpmovsxbd($tmp2$$XMMRegister, $src$$XMMRegister, 1); 23780 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sizemask()), $scratch$$Register); 23781 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 1); 23782 __ vpmovsxdq($tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 23783 __ vpsllq($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x20, vector_len); 23784 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 23785 __ evmovdqul($tmp$$XMMRegister, k1, ExternalAddress(vector_long_shufflemask()), false, vector_len, $scratch$$Register); 23786 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 23787 __ evmovdqul($dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 23788 %} 23789 ins_pipe( pipe_slow ); 23790 %} 23791 //-------------------------------- Rearrange ------------------------------------- 23792 23793 instruct rearrange8b(vecD dst, vecD shuffle) %{ 23794 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && 23795 n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23796 match(Set dst (VectorRearrange dst shuffle)); 23797 effect(TEMP dst); 23798 format %{ "pshufb $dst, $shuffle\t! rerrrange (8BRearrange)" %} 23799 ins_encode %{ 23800 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 23801 %} 23802 ins_pipe( pipe_slow ); 23803 %} 23804 23805 instruct rearrange16b(vecX dst, vecX shuffle) %{ 23806 predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && 23807 n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23808 match(Set dst (VectorRearrange dst shuffle)); 23809 effect(TEMP dst); 23810 format %{ "pshufb $dst, $shuffle\t! rearrange (16BRearrange)" %} 23811 ins_encode %{ 23812 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 23813 %} 23814 ins_pipe( pipe_slow ); 23815 %} 23816 23817 instruct rearrange32b(vecY dst, vecY src, vecY shuffle) %{ 23818 predicate(UseAVX > 2 && VM_Version::supports_avx512vbmi() && n->as_Vector()->length() == 32 && 23819 n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23820 match(Set dst (VectorRearrange src shuffle)); 23821 effect(TEMP dst); 23822 format %{ "vpermb $dst, $shuffle\t! rearrange (32BRearrange)" %} 23823 ins_encode %{ 23824 int vector_len = 1; 23825 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); 23826 %} 23827 ins_pipe( pipe_slow ); 23828 %} 23829 23830 instruct rearrange64b(vecZ dst, vecZ src, vecZ shuffle) %{ 23831 predicate(UseAVX > 2 && VM_Version::supports_avx512vbmi() && n->as_Vector()->length() == 64 && 23832 n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23833 match(Set dst (VectorRearrange src shuffle)); 23834 effect(TEMP dst); 23835 format %{ "vpermb $dst, $shuffle\t! rearrange (64BRearrange)" %} 23836 ins_encode %{ 23837 int vector_len = 2; 23838 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); 23839 %} 23840 ins_pipe( pipe_slow ); 23841 %} 23842 23843 instruct rearrange4s(vecD dst, vecD shuffle) %{ 23844 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && 23845 n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23846 match(Set dst (VectorRearrange dst shuffle)); 23847 effect(TEMP dst); 23848 format %{ "pshufb $dst, $shuffle\t! rerrrange (4SRearrange)" %} 23849 ins_encode %{ 23850 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 23851 %} 23852 ins_pipe( pipe_slow ); 23853 %} 23854 23855 instruct rearrange8s(vecX dst, vecX shuffle) %{ 23856 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && 23857 n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23858 match(Set dst (VectorRearrange dst shuffle)); 23859 effect(TEMP dst); 23860 format %{ "pshufb $dst, $shuffle\t! rearrange (8SRearrange)" %} 23861 ins_encode %{ 23862 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 23863 %} 23864 ins_pipe( pipe_slow ); 23865 %} 23866 23867 instruct rearrange16s(vecY dst, vecY src, vecY shuffle) %{ 23868 predicate(UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->as_Vector()->length() == 16 && 23869 n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23870 match(Set dst (VectorRearrange src shuffle)); 23871 effect(TEMP dst); 23872 format %{ "vpermw $dst, $shuffle\t! rearrange (16SRearrange)" %} 23873 ins_encode %{ 23874 int vector_len = 1; 23875 __ vpermw($dst$$XMMRegister, k0, $shuffle$$XMMRegister, $src$$XMMRegister, false,vector_len); 23876 %} 23877 ins_pipe( pipe_slow ); 23878 %} 23879 23880 instruct rearrange32s(vecZ dst, vecZ src, vecZ shuffle) %{ 23881 predicate(UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->as_Vector()->length() == 32 && 23882 n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23883 match(Set dst (VectorRearrange src shuffle)); 23884 effect(TEMP dst); 23885 format %{ "vpermw $dst, $shuffle\t! rearrange (32SRearrange)" %} 23886 ins_encode %{ 23887 int vector_len = 2; 23888 __ __ vpermw($dst$$XMMRegister, k0, $shuffle$$XMMRegister, $src$$XMMRegister, false,vector_len); 23889 %} 23890 ins_pipe( pipe_slow ); 23891 %} 23892 23893 instruct rearrange4i(vecX dst, vecX shuffle) %{ 23894 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && 23895 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23896 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23897 match(Set dst (VectorRearrange dst shuffle)); 23898 effect(TEMP dst); 23899 format %{ "pshufb $dst, $shuffle\t! rearrange (4IRearrange)" %} 23900 ins_encode %{ 23901 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 23902 %} 23903 ins_pipe( pipe_slow ); 23904 %} 23905 23906 instruct rearrange8i(vecY dst, vecY src, vecY shuffle) %{ 23907 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 23908 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23909 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23910 match(Set dst (VectorRearrange src shuffle)); 23911 effect(TEMP dst); 23912 format %{ "vpermd $dst, $src, $shuffle\t! rearrange (8IRearrange)" %} 23913 ins_encode %{ 23914 int vector_len = 1; 23915 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister); 23916 %} 23917 ins_pipe( pipe_slow ); 23918 %} 23919 23920 instruct rearrange16i(vecZ dst, vecZ src, vecZ shuffle) %{ 23921 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 23922 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23923 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23924 match(Set dst (VectorRearrange src shuffle)); 23925 effect(TEMP dst); 23926 format %{ "vpermd $dst, $src, $shuffle\t! rearrange (16IRearrange)" %} 23927 ins_encode %{ 23928 int vector_len = 2; 23929 __ evpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); 23930 %} 23931 ins_pipe( pipe_slow ); 23932 %} 23933 23934 instruct rearrange4l(vecY dst, vecY src, vecY shuffle) %{ 23935 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 23936 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23937 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23938 match(Set dst (VectorRearrange src shuffle)); 23939 effect(TEMP dst); 23940 format %{ "vpermd $dst, $src, $shuffle\t! rearrange (4LRearrange)" %} 23941 ins_encode %{ 23942 int vector_len = 1; 23943 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister); 23944 %} 23945 ins_pipe( pipe_slow ); 23946 %} 23947 23948 instruct rearrange8l(vecZ dst, vecZ src, vecZ shuffle) %{ 23949 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 23950 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23951 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23952 match(Set dst (VectorRearrange src shuffle)); 23953 effect(TEMP dst); 23954 format %{ "vpermd $dst, $src, $shuffle\t! rearrange (8LRearrange)" %} 23955 ins_encode %{ 23956 int vector_len = 2; 23957 __ evpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vector_len); 23958 %} 23959 ins_pipe( pipe_slow ); 23960 %} 23961 // --------------------------------- FMA -------------------------------------- 23962 23963 // a * b + c 23964 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 23965 predicate(UseFMA && n->as_Vector()->length() == 2); 23966 match(Set c (FmaVD c (Binary a b))); 23967 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 23968 ins_cost(150); 23969 ins_encode %{ 23970 int vector_len = 0; 23971 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23972 %} 23973 ins_pipe( pipe_slow ); 23974 %} 23975 23976 // a * b + c 23977 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 23978 predicate(UseFMA && n->as_Vector()->length() == 2); 23979 match(Set c (FmaVD c (Binary a (LoadVector b)))); 23980 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 23981 ins_cost(150); 23982 ins_encode %{ 23983 int vector_len = 0; 23984 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23985 %} 23986 ins_pipe( pipe_slow ); 23987 %} 23988 23989 23990 // a * b + c 23991 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 23992 predicate(UseFMA && n->as_Vector()->length() == 4); 23993 match(Set c (FmaVD c (Binary a b))); 23994 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 23995 ins_cost(150); 23996 ins_encode %{ 23997 int vector_len = 1; 23998 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23999 %} 24000 ins_pipe( pipe_slow ); 24001 %} 24002 24003 // a * b + c 24004 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 24005 predicate(UseFMA && n->as_Vector()->length() == 4); 24006 match(Set c (FmaVD c (Binary a (LoadVector b)))); 24007 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 24008 ins_cost(150); 24009 ins_encode %{ 24010 int vector_len = 1; 24011 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 24012 %} 24013 ins_pipe( pipe_slow ); 24014 %} 24015 24016 // a * b + c 24017 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 24018 predicate(UseFMA && n->as_Vector()->length() == 8); 24019 match(Set c (FmaVD c (Binary a b))); 24020 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 24021 ins_cost(150); 24022 ins_encode %{ 24023 int vector_len = 2; 24024 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 24025 %} 24026 ins_pipe( pipe_slow ); 24027 %} 24028 24029 // a * b + c 24030 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 24031 predicate(UseFMA && n->as_Vector()->length() == 8); 24032 match(Set c (FmaVD c (Binary a (LoadVector b)))); 24033 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 24034 ins_cost(150); 24035 ins_encode %{ 24036 int vector_len = 2; 24037 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 24038 %} 24039 ins_pipe( pipe_slow ); 24040 %} 24041 24042 // a * b + c 24043 instruct vfma2F_reg(vecD a, vecD b, vecD c) %{ 24044 predicate(UseFMA && n->as_Vector()->length() == 2); 24045 match(Set c (FmaVF c (Binary a b))); 24046 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed2F" %} 24047 ins_cost(150); 24048 ins_encode %{ 24049 int vector_len = 0; 24050 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 24051 %} 24052 ins_pipe( pipe_slow ); 24053 %} 24054 24055 // a * b + c 24056 instruct vfma2F_mem(vecD a, memory b, vecD c) %{ 24057 predicate(UseFMA && n->as_Vector()->length() == 2); 24058 match(Set c (FmaVF c (Binary a (LoadVector b)))); 24059 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed2F" %} 24060 ins_cost(150); 24061 ins_encode %{ 24062 int vector_len = 0; 24063 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 24064 %} 24065 ins_pipe( pipe_slow ); 24066 %} 24067 24068 // a * b + c 24069 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 24070 predicate(UseFMA && n->as_Vector()->length() == 4); 24071 match(Set c (FmaVF c (Binary a b))); 24072 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 24073 ins_cost(150); 24074 ins_encode %{ 24075 int vector_len = 0; 24076 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 24077 %} 24078 ins_pipe( pipe_slow ); 24079 %} 24080 24081 // a * b + c 24082 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 24083 predicate(UseFMA && n->as_Vector()->length() == 4); 24084 match(Set c (FmaVF c (Binary a (LoadVector b)))); 24085 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 24086 ins_cost(150); 24087 ins_encode %{ 24088 int vector_len = 0; 24089 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 24090 %} 24091 ins_pipe( pipe_slow ); 24092 %} 24093 24094 // a * b + c 24095 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 24096 predicate(UseFMA && n->as_Vector()->length() == 8); 24097 match(Set c (FmaVF c (Binary a b))); 24098 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 24099 ins_cost(150); 24100 ins_encode %{ 24101 int vector_len = 1; 24102 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 24103 %} 24104 ins_pipe( pipe_slow ); 24105 %} 24106 24107 // a * b + c 24108 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 24109 predicate(UseFMA && n->as_Vector()->length() == 8); 24110 match(Set c (FmaVF c (Binary a (LoadVector b)))); 24111 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 24112 ins_cost(150); 24113 ins_encode %{ 24114 int vector_len = 1; 24115 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 24116 %} 24117 ins_pipe( pipe_slow ); 24118 %} 24119 24120 // a * b + c 24121 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 24122 predicate(UseFMA && n->as_Vector()->length() == 16); 24123 match(Set c (FmaVF c (Binary a b))); 24124 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 24125 ins_cost(150); 24126 ins_encode %{ 24127 int vector_len = 2; 24128 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 24129 %} 24130 ins_pipe( pipe_slow ); 24131 %} 24132 24133 // a * b + c 24134 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 24135 predicate(UseFMA && n->as_Vector()->length() == 16); 24136 match(Set c (FmaVF c (Binary a (LoadVector b)))); 24137 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 24138 ins_cost(150); 24139 ins_encode %{ 24140 int vector_len = 2; 24141 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 24142 %} 24143 ins_pipe( pipe_slow ); 24144 %} 24145 24146 // --------------------------------- PopCount -------------------------------------- 24147 24148 instruct vpopcount2I(vecD dst, vecD src) %{ 24149 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 24150 match(Set dst (PopCountVI src)); 24151 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 24152 ins_encode %{ 24153 int vector_len = 0; 24154 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 24155 %} 24156 ins_pipe( pipe_slow ); 24157 %} 24158 24159 instruct vpopcount4I(vecX dst, vecX src) %{ 24160 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 24161 match(Set dst (PopCountVI src)); 24162 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 24163 ins_encode %{ 24164 int vector_len = 0; 24165 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 24166 %} 24167 ins_pipe( pipe_slow ); 24168 %} 24169 24170 instruct vpopcount8I(vecY dst, vecY src) %{ 24171 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 24172 match(Set dst (PopCountVI src)); 24173 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 24174 ins_encode %{ 24175 int vector_len = 1; 24176 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 24177 %} 24178 ins_pipe( pipe_slow ); 24179 %} 24180 24181 instruct vpopcount16I(vecZ dst, vecZ src) %{ 24182 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 24183 match(Set dst (PopCountVI src)); 24184 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 24185 ins_encode %{ 24186 int vector_len = 2; 24187 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 24188 %} 24189 ins_pipe( pipe_slow ); 24190 %}