1 // 2 // Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1075 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1076 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1077 1078 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1079 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1080 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1081 1082 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1083 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1084 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1085 1086 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1087 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1088 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1089 1090 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1091 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1092 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1093 1094 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1095 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1096 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1097 1098 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1099 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1100 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1101 1102 #ifdef _LP64 1103 1104 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1105 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1106 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1107 1108 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1109 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1110 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1111 1112 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1113 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1114 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1115 1116 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1117 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1118 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1119 1120 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1121 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1122 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1123 1124 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1125 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1126 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1127 1128 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1129 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1130 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1131 1132 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1133 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1134 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1135 1136 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1137 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1138 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1139 1140 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1141 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1142 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1143 1144 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1145 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1146 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1147 1148 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1149 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1150 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1151 1152 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1153 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1154 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1155 1156 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1157 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1158 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1159 1160 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1161 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1162 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1163 1164 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1165 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1166 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1167 1168 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1169 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1170 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1171 1172 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1173 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1174 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1175 1176 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1177 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1178 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1179 1180 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1181 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1182 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1183 1184 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1185 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1186 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1187 1188 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1189 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1190 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1191 1192 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1193 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1194 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1195 1196 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1197 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1198 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1199 1200 #endif 1201 1202 %} 1203 1204 1205 //----------SOURCE BLOCK------------------------------------------------------- 1206 // This is a block of C++ code which provides values, functions, and 1207 // definitions necessary in the rest of the architecture description 1208 1209 source_hpp %{ 1210 // Header information of the source block. 1211 // Method declarations/definitions which are used outside 1212 // the ad-scope can conveniently be defined here. 1213 // 1214 // To keep related declarations/definitions/uses close together, 1215 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1216 1217 class NativeJump; 1218 1219 class CallStubImpl { 1220 1221 //-------------------------------------------------------------- 1222 //---< Used for optimization in Compile::shorten_branches >--- 1223 //-------------------------------------------------------------- 1224 1225 public: 1226 // Size of call trampoline stub. 1227 static uint size_call_trampoline() { 1228 return 0; // no call trampolines on this platform 1229 } 1230 1231 // number of relocations needed by a call trampoline stub 1232 static uint reloc_call_trampoline() { 1233 return 0; // no call trampolines on this platform 1234 } 1235 }; 1236 1237 class HandlerImpl { 1238 1239 public: 1240 1241 static int emit_exception_handler(CodeBuffer &cbuf); 1242 static int emit_deopt_handler(CodeBuffer& cbuf); 1243 1244 static uint size_exception_handler() { 1245 // NativeCall instruction size is the same as NativeJump. 1246 // exception handler starts out as jump and can be patched to 1247 // a call be deoptimization. (4932387) 1248 // Note that this value is also credited (in output.cpp) to 1249 // the size of the code section. 1250 return NativeJump::instruction_size; 1251 } 1252 1253 #ifdef _LP64 1254 static uint size_deopt_handler() { 1255 // three 5 byte instructions plus one move for unreachable address. 1256 return 15+3; 1257 } 1258 #else 1259 static uint size_deopt_handler() { 1260 // NativeCall instruction size is the same as NativeJump. 1261 // exception handler starts out as jump and can be patched to 1262 // a call be deoptimization. (4932387) 1263 // Note that this value is also credited (in output.cpp) to 1264 // the size of the code section. 1265 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1266 } 1267 #endif 1268 }; 1269 1270 %} // end source_hpp 1271 1272 source %{ 1273 1274 #include "opto/addnode.hpp" 1275 1276 // Emit exception handler code. 1277 // Stuff framesize into a register and call a VM stub routine. 1278 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1279 1280 // Note that the code buffer's insts_mark is always relative to insts. 1281 // That's why we must use the macroassembler to generate a handler. 1282 MacroAssembler _masm(&cbuf); 1283 address base = __ start_a_stub(size_exception_handler()); 1284 if (base == NULL) { 1285 ciEnv::current()->record_failure("CodeCache is full"); 1286 return 0; // CodeBuffer::expand failed 1287 } 1288 int offset = __ offset(); 1289 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1290 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1291 __ end_a_stub(); 1292 return offset; 1293 } 1294 1295 // Emit deopt handler code. 1296 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1297 1298 // Note that the code buffer's insts_mark is always relative to insts. 1299 // That's why we must use the macroassembler to generate a handler. 1300 MacroAssembler _masm(&cbuf); 1301 address base = __ start_a_stub(size_deopt_handler()); 1302 if (base == NULL) { 1303 ciEnv::current()->record_failure("CodeCache is full"); 1304 return 0; // CodeBuffer::expand failed 1305 } 1306 int offset = __ offset(); 1307 1308 #ifdef _LP64 1309 address the_pc = (address) __ pc(); 1310 Label next; 1311 // push a "the_pc" on the stack without destroying any registers 1312 // as they all may be live. 1313 1314 // push address of "next" 1315 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1316 __ bind(next); 1317 // adjust it so it matches "the_pc" 1318 __ subptr(Address(rsp, 0), __ offset() - offset); 1319 #else 1320 InternalAddress here(__ pc()); 1321 __ pushptr(here.addr()); 1322 #endif 1323 1324 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1325 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1326 __ end_a_stub(); 1327 return offset; 1328 } 1329 1330 1331 //============================================================================= 1332 1333 // Float masks come from different places depending on platform. 1334 #ifdef _LP64 1335 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1336 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1337 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1338 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1339 static address vector_float_signmask() { return StubRoutines::x86::vector_float_sign_mask(); } 1340 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip(); } 1341 static address vector_double_signmask() { return StubRoutines::x86::vector_double_sign_mask(); } 1342 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip(); } 1343 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1344 static address vector_byte_bitset() { return StubRoutines::x86::vector_byte_bitset(); } 1345 static address vector_long_perm_mask() { return StubRoutines::x86::vector_long_perm_mask(); } 1346 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1347 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1348 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1349 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1350 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1351 static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); } 1352 #else 1353 static address float_signmask() { return (address)float_signmask_pool; } 1354 static address float_signflip() { return (address)float_signflip_pool; } 1355 static address double_signmask() { return (address)double_signmask_pool; } 1356 static address double_signflip() { return (address)double_signflip_pool; } 1357 #endif 1358 1359 1360 const bool Matcher::match_rule_supported(int opcode) { 1361 if (!has_match_rule(opcode)) 1362 return false; 1363 1364 bool ret_value = true; 1365 switch (opcode) { 1366 case Op_PopCountI: 1367 case Op_PopCountL: 1368 if (!UsePopCountInstruction) 1369 ret_value = false; 1370 break; 1371 case Op_PopCountVI: 1372 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1373 ret_value = false; 1374 break; 1375 case Op_MulVI: 1376 case Op_MulVL: 1377 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1378 ret_value = false; 1379 break; 1380 case Op_MulReductionVL: 1381 if (VM_Version::supports_avx512dq() == false) 1382 ret_value = false; 1383 break; 1384 case Op_AddReductionVL: 1385 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1386 ret_value = false; 1387 break; 1388 case Op_MulReductionVI: 1389 if (UseSSE < 4) // requires at least SSE4 1390 ret_value = false; 1391 break; 1392 case Op_AddReductionVF: 1393 case Op_AddReductionVD: 1394 case Op_MulReductionVF: 1395 case Op_MulReductionVD: 1396 if (UseSSE < 1) // requires at least SSE 1397 ret_value = false; 1398 break; 1399 case Op_SqrtVD: 1400 case Op_SqrtVF: 1401 if (UseAVX < 1) // enabled for AVX only 1402 ret_value = false; 1403 break; 1404 case Op_CompareAndSwapL: 1405 #ifdef _LP64 1406 case Op_CompareAndSwapP: 1407 #endif 1408 if (!VM_Version::supports_cx8()) 1409 ret_value = false; 1410 break; 1411 case Op_CMoveVF: 1412 case Op_CMoveVD: 1413 if (UseAVX < 1 || UseAVX > 2) 1414 ret_value = false; 1415 break; 1416 case Op_StrIndexOf: 1417 if (!UseSSE42Intrinsics) 1418 ret_value = false; 1419 break; 1420 case Op_StrIndexOfChar: 1421 if (!UseSSE42Intrinsics) 1422 ret_value = false; 1423 break; 1424 case Op_OnSpinWait: 1425 if (VM_Version::supports_on_spin_wait() == false) 1426 ret_value = false; 1427 break; 1428 } 1429 1430 return ret_value; // Per default match rules are supported. 1431 } 1432 1433 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt, int op_arity) { 1434 // identify extra cases that we might want to provide match rules for 1435 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1436 bool ret_value = match_rule_supported(opcode); 1437 if (ret_value) { 1438 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1439 if (!vector_size_supported(bt, vlen)) { 1440 ret_value = false; 1441 } else if (size_in_bits > 256 && UseAVX <= 2) { 1442 // Only AVX512 supports 512-bit vectors 1443 ret_value = false; 1444 } else if (UseAVX == 0 && size_in_bits > 128) { 1445 // Only AVX supports 256-bit vectors 1446 ret_value = false; 1447 } else if (is_subword_type(bt) && size_in_bits == 512 && VM_Version::supports_avx512bw() == false) { 1448 // Byte and Short types are not supported in AVX512 if AVX512BW is not true. 1449 ret_value = false; 1450 } else { 1451 switch (opcode) { 1452 case Op_AbsV: 1453 if (is_integral_type(bt) && UseSSE < 3) { ret_value = false; } 1454 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1455 else if (bt == T_LONG && UseAVX <= 2) { ret_value = false; } // Implementation limitation 1456 break; 1457 case Op_AddVB: 1458 case Op_SubVB: 1459 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1460 ret_value = false; 1461 break; 1462 case Op_MaxV: 1463 case Op_MinV: 1464 if (UseSSE < 4 && (bt == T_BYTE || bt == T_INT || bt == T_LONG)) 1465 ret_value = false; 1466 break; 1467 case Op_MulVB: 1468 if (size_in_bits <= 128 && UseSSE < 4) { ret_value = false; } 1469 else if (size_in_bits > 256 && UseAVX < 2) { ret_value = false; } 1470 break; 1471 case Op_LShiftVI: 1472 case Op_RShiftVI: 1473 case Op_URShiftVI: 1474 if (op_arity == 2 && UseAVX <= 1) 1475 ret_value = false; 1476 break; 1477 case Op_LShiftVL: 1478 case Op_RShiftVL: 1479 case Op_URShiftVL: 1480 if (op_arity == 2 && UseAVX <= 1) 1481 ret_value = false; 1482 break; 1483 case Op_URShiftVS: 1484 case Op_RShiftVS: 1485 case Op_LShiftVS: 1486 case Op_MulVS: 1487 case Op_AddVS: 1488 case Op_SubVS: 1489 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1490 ret_value = false; 1491 break; 1492 case Op_CMoveVF: 1493 if (vlen != 8) 1494 ret_value = false; 1495 break; 1496 case Op_CMoveVD: 1497 if (vlen != 4) 1498 ret_value = false; 1499 break; 1500 case Op_AddReductionVI: 1501 if (bt == T_INT && UseSSE < 3) { ret_value = false; } 1502 else if (is_subword_type(bt) && UseSSE <= 3) { ret_value = false; } 1503 break; 1504 case Op_AndReductionV: 1505 case Op_OrReductionV: 1506 case Op_XorReductionV: 1507 if (bt == T_BYTE && UseSSE <= 3) { ret_value = false; } 1508 break; 1509 case Op_VectorMaskCmp: 1510 if (UseAVX <= 0) { ret_value = false; } 1511 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1512 break; 1513 case Op_MinReductionV: 1514 case Op_MaxReductionV: 1515 if ((bt == T_INT || bt == T_LONG || bt == T_BYTE) && UseSSE <= 3) { ret_value = false; } 1516 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1517 break; 1518 case Op_VectorBlend: 1519 if (UseSSE <= 3 && UseAVX == 0) { ret_value = false; } 1520 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1521 break; 1522 case Op_VectorTest: 1523 if (UseAVX <= 0) { ret_value = false; } 1524 else if (size_in_bits != 128 && size_in_bits != 256) { ret_value = false; } // Implementation limitation 1525 break; 1526 case Op_VectorLoadMask: 1527 if (UseSSE <= 3) { ret_value = false; } 1528 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1529 else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } // Implementation limitation 1530 break; 1531 case Op_VectorStoreMask: 1532 if (UseAVX < 0) { ret_value = false; } // Implementation limitation 1533 else if ((size_in_bits >= 256 || bt == T_LONG || bt == T_DOUBLE) && UseAVX < 2) { ret_value = false; } // Implementation limitation 1534 else if (vlen == 1 || vlen == 2) { ret_value = false; } // Implementation limitation 1535 else if (size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } // Implementation limitation 1536 break; 1537 case Op_VectorCastB2X: 1538 if (UseAVX <= 0) { ret_value = false; } 1539 else if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } 1540 break; 1541 case Op_VectorCastS2X: 1542 if (UseAVX <= 0) { ret_value = false; } 1543 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1544 else if (is_integral_type(bt) && vlen * type2aelembytes(T_SHORT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1545 break; 1546 case Op_VectorCastI2X: 1547 if (UseAVX <= 0) { ret_value = false; } 1548 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1549 else if (is_integral_type(bt) && vlen * type2aelembytes(T_INT) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1550 break; 1551 case Op_VectorCastL2X: 1552 if (UseAVX <= 0) { ret_value = false; } 1553 else if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { ret_value = false; } 1554 else if (is_integral_type(bt) && vlen * type2aelembytes(T_LONG) * BitsPerByte == 256 && UseAVX < 2) { ret_value = false; } 1555 else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { ret_value = false; } 1556 break; 1557 case Op_VectorCastF2X: 1558 // Casts from FP to integral types require special fixup logic not easily 1559 // implementable with vectors. 1560 if (UseAVX <= 0) { ret_value = false; } 1561 else if (bt != T_DOUBLE) { ret_value = false; } // Implementation limitation 1562 break; 1563 case Op_VectorCastD2X: 1564 // Casts from FP to integral types require special fixup logic not easily 1565 // implementable with vectors. 1566 if (UseAVX <= 0) { ret_value = false; } 1567 else if (bt != T_FLOAT) { ret_value = false; } // Implementation limitation 1568 break; 1569 case Op_VectorReinterpret: 1570 if (size_in_bits >= 256 && UseAVX < 2) { ret_value = false; } 1571 break; 1572 case Op_MulReductionVI: 1573 if (bt ==T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { ret_value = false; } 1574 break; 1575 default: 1576 break; 1577 } 1578 } 1579 } 1580 if (ret_value) { 1581 assert(is_java_primitive(bt) && (vlen > 0) && is_power_of_2(vlen) && 1582 vector_size_supported(bt, vlen), "must be supported"); 1583 } 1584 1585 return ret_value; // Per default match rules are supported. 1586 } 1587 1588 const bool Matcher::has_predicated_vectors(void) { 1589 bool ret_value = false; 1590 if (UseAVX > 2) { 1591 ret_value = VM_Version::supports_avx512vl(); 1592 } 1593 1594 return ret_value; 1595 } 1596 1597 const int Matcher::float_pressure(int default_pressure_threshold) { 1598 int float_pressure_threshold = default_pressure_threshold; 1599 #ifdef _LP64 1600 if (UseAVX > 2) { 1601 // Increase pressure threshold on machines with AVX3 which have 1602 // 2x more XMM registers. 1603 float_pressure_threshold = default_pressure_threshold * 2; 1604 } 1605 #endif 1606 return float_pressure_threshold; 1607 } 1608 1609 // Max vector size in bytes. 0 if not supported. 1610 const int Matcher::vector_width_in_bytes(BasicType bt) { 1611 assert(is_java_primitive(bt), "only primitive type vectors"); 1612 if (UseSSE < 2) return 0; 1613 // SSE2 supports 128bit vectors for all types. 1614 // AVX2 supports 256bit vectors for all types. 1615 // AVX2/EVEX supports 512bit vectors for all types. 1616 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1617 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1618 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1619 size = (UseAVX > 2) ? 64 : 32; 1620 // Use flag to limit vector size. 1621 size = MIN2(size,(int)MaxVectorSize); 1622 // Minimum 2 values in vector (or 4 for bytes). 1623 switch (bt) { 1624 case T_DOUBLE: 1625 case T_LONG: 1626 if (size < 16) return 0; 1627 break; 1628 case T_FLOAT: 1629 case T_INT: 1630 if (size < 8) return 0; 1631 break; 1632 case T_BOOLEAN: 1633 if (size < 4) return 0; 1634 break; 1635 case T_CHAR: 1636 if (size < 4) return 0; 1637 break; 1638 case T_BYTE: 1639 if (size < 4) return 0; 1640 break; 1641 case T_SHORT: 1642 if (size < 4) return 0; 1643 break; 1644 default: 1645 ShouldNotReachHere(); 1646 } 1647 return size; 1648 } 1649 1650 // Limits on vector size (number of elements) loaded into vector. 1651 const int Matcher::max_vector_size(const BasicType bt) { 1652 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1653 } 1654 const int Matcher::min_vector_size(const BasicType bt) { 1655 int max_size = max_vector_size(bt); 1656 // Min size which can be loaded into vector is 4 bytes. 1657 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1658 return MIN2(size,max_size); 1659 } 1660 1661 // Vector ideal reg corresponding to specidied size in bytes 1662 const uint Matcher::vector_ideal_reg(int size) { 1663 assert(MaxVectorSize >= size, ""); 1664 switch(size) { 1665 case 4: return Op_VecS; 1666 case 8: return Op_VecD; 1667 case 16: return Op_VecX; 1668 case 32: return Op_VecY; 1669 case 64: return Op_VecZ; 1670 } 1671 ShouldNotReachHere(); 1672 return 0; 1673 } 1674 1675 // Only lowest bits of xmm reg are used for vector shift count. 1676 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1677 return Op_VecS; 1678 } 1679 1680 // x86 supports misaligned vectors store/load. 1681 const bool Matcher::misaligned_vectors_ok() { 1682 return !AlignVector; // can be changed by flag 1683 } 1684 1685 // x86 AES instructions are compatible with SunJCE expanded 1686 // keys, hence we do not need to pass the original key to stubs 1687 const bool Matcher::pass_original_key_for_aes() { 1688 return false; 1689 } 1690 1691 1692 const bool Matcher::convi2l_type_required = true; 1693 1694 // Check for shift by small constant as well 1695 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1696 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1697 shift->in(2)->get_int() <= 3 && 1698 // Are there other uses besides address expressions? 1699 !matcher->is_visited(shift)) { 1700 address_visited.set(shift->_idx); // Flag as address_visited 1701 mstack.push(shift->in(2), Matcher::Visit); 1702 Node *conv = shift->in(1); 1703 #ifdef _LP64 1704 // Allow Matcher to match the rule which bypass 1705 // ConvI2L operation for an array index on LP64 1706 // if the index value is positive. 1707 if (conv->Opcode() == Op_ConvI2L && 1708 conv->as_Type()->type()->is_long()->_lo >= 0 && 1709 // Are there other uses besides address expressions? 1710 !matcher->is_visited(conv)) { 1711 address_visited.set(conv->_idx); // Flag as address_visited 1712 mstack.push(conv->in(1), Matcher::Pre_Visit); 1713 } else 1714 #endif 1715 mstack.push(conv, Matcher::Pre_Visit); 1716 return true; 1717 } 1718 return false; 1719 } 1720 1721 // Should the Matcher clone shifts on addressing modes, expecting them 1722 // to be subsumed into complex addressing expressions or compute them 1723 // into registers? 1724 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1725 Node *off = m->in(AddPNode::Offset); 1726 if (off->is_Con()) { 1727 address_visited.test_set(m->_idx); // Flag as address_visited 1728 Node *adr = m->in(AddPNode::Address); 1729 1730 // Intel can handle 2 adds in addressing mode 1731 // AtomicAdd is not an addressing expression. 1732 // Cheap to find it by looking for screwy base. 1733 if (adr->is_AddP() && 1734 !adr->in(AddPNode::Base)->is_top() && 1735 // Are there other uses besides address expressions? 1736 !is_visited(adr)) { 1737 address_visited.set(adr->_idx); // Flag as address_visited 1738 Node *shift = adr->in(AddPNode::Offset); 1739 if (!clone_shift(shift, this, mstack, address_visited)) { 1740 mstack.push(shift, Pre_Visit); 1741 } 1742 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1743 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1744 } else { 1745 mstack.push(adr, Pre_Visit); 1746 } 1747 1748 // Clone X+offset as it also folds into most addressing expressions 1749 mstack.push(off, Visit); 1750 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1751 return true; 1752 } else if (clone_shift(off, this, mstack, address_visited)) { 1753 address_visited.test_set(m->_idx); // Flag as address_visited 1754 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1755 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1756 return true; 1757 } 1758 return false; 1759 } 1760 1761 void Compile::reshape_address(AddPNode* addp) { 1762 } 1763 1764 // Helper methods for MachSpillCopyNode::implementation(). 1765 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1766 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1767 // In 64-bit VM size calculation is very complex. Emitting instructions 1768 // into scratch buffer is used to get size in 64-bit VM. 1769 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1770 assert(ireg == Op_VecS || // 32bit vector 1771 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1772 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1773 "no non-adjacent vector moves" ); 1774 if (cbuf) { 1775 MacroAssembler _masm(cbuf); 1776 int offset = __ offset(); 1777 switch (ireg) { 1778 case Op_VecS: // copy whole register 1779 case Op_VecD: 1780 case Op_VecX: 1781 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1782 break; 1783 case Op_VecY: 1784 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1785 break; 1786 case Op_VecZ: 1787 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1788 break; 1789 default: 1790 ShouldNotReachHere(); 1791 } 1792 int size = __ offset() - offset; 1793 #ifdef ASSERT 1794 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1795 assert(!do_size || size == 4, "incorrect size calculattion"); 1796 #endif 1797 return size; 1798 #ifndef PRODUCT 1799 } else if (!do_size) { 1800 switch (ireg) { 1801 case Op_VecS: 1802 case Op_VecD: 1803 case Op_VecX: 1804 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1805 break; 1806 case Op_VecY: 1807 case Op_VecZ: 1808 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1809 break; 1810 default: 1811 ShouldNotReachHere(); 1812 } 1813 #endif 1814 } 1815 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1816 return (UseAVX > 2) ? 6 : 4; 1817 } 1818 1819 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1820 int stack_offset, int reg, uint ireg, outputStream* st) { 1821 // In 64-bit VM size calculation is very complex. Emitting instructions 1822 // into scratch buffer is used to get size in 64-bit VM. 1823 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1824 if (cbuf) { 1825 MacroAssembler _masm(cbuf); 1826 int offset = __ offset(); 1827 if (is_load) { 1828 switch (ireg) { 1829 case Op_VecS: 1830 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1831 break; 1832 case Op_VecD: 1833 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1834 break; 1835 case Op_VecX: 1836 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1837 break; 1838 case Op_VecY: 1839 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1840 break; 1841 case Op_VecZ: 1842 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1843 break; 1844 default: 1845 ShouldNotReachHere(); 1846 } 1847 } else { // store 1848 switch (ireg) { 1849 case Op_VecS: 1850 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1851 break; 1852 case Op_VecD: 1853 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1854 break; 1855 case Op_VecX: 1856 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1857 break; 1858 case Op_VecY: 1859 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1860 break; 1861 case Op_VecZ: 1862 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1863 break; 1864 default: 1865 ShouldNotReachHere(); 1866 } 1867 } 1868 int size = __ offset() - offset; 1869 #ifdef ASSERT 1870 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1871 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1872 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1873 #endif 1874 return size; 1875 #ifndef PRODUCT 1876 } else if (!do_size) { 1877 if (is_load) { 1878 switch (ireg) { 1879 case Op_VecS: 1880 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1881 break; 1882 case Op_VecD: 1883 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1884 break; 1885 case Op_VecX: 1886 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1887 break; 1888 case Op_VecY: 1889 case Op_VecZ: 1890 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1891 break; 1892 default: 1893 ShouldNotReachHere(); 1894 } 1895 } else { // store 1896 switch (ireg) { 1897 case Op_VecS: 1898 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1899 break; 1900 case Op_VecD: 1901 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1902 break; 1903 case Op_VecX: 1904 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1905 break; 1906 case Op_VecY: 1907 case Op_VecZ: 1908 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1909 break; 1910 default: 1911 ShouldNotReachHere(); 1912 } 1913 } 1914 #endif 1915 } 1916 bool is_single_byte = false; 1917 int vec_len = 0; 1918 if ((UseAVX > 2) && (stack_offset != 0)) { 1919 int tuple_type = Assembler::EVEX_FVM; 1920 int input_size = Assembler::EVEX_32bit; 1921 switch (ireg) { 1922 case Op_VecS: 1923 tuple_type = Assembler::EVEX_T1S; 1924 break; 1925 case Op_VecD: 1926 tuple_type = Assembler::EVEX_T1S; 1927 input_size = Assembler::EVEX_64bit; 1928 break; 1929 case Op_VecX: 1930 break; 1931 case Op_VecY: 1932 vec_len = 1; 1933 break; 1934 case Op_VecZ: 1935 vec_len = 2; 1936 break; 1937 } 1938 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1939 } 1940 int offset_size = 0; 1941 int size = 5; 1942 if (UseAVX > 2 ) { 1943 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1944 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1945 size += 2; // Need an additional two bytes for EVEX encoding 1946 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1947 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1948 } else { 1949 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1950 size += 2; // Need an additional two bytes for EVEX encodding 1951 } 1952 } else { 1953 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1954 } 1955 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1956 return size+offset_size; 1957 } 1958 1959 static inline jint replicate4_imm(int con, int width) { 1960 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1961 assert(width == 1 || width == 2, "only byte or short types here"); 1962 int bit_width = width * 8; 1963 jint val = con; 1964 val &= (1 << bit_width) - 1; // mask off sign bits 1965 while(bit_width < 32) { 1966 val |= (val << bit_width); 1967 bit_width <<= 1; 1968 } 1969 return val; 1970 } 1971 1972 static inline jlong replicate8_imm(int con, int width) { 1973 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1974 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1975 int bit_width = width * 8; 1976 jlong val = con; 1977 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1978 while(bit_width < 64) { 1979 val |= (val << bit_width); 1980 bit_width <<= 1; 1981 } 1982 return val; 1983 } 1984 1985 #ifndef PRODUCT 1986 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1987 st->print("nop \t# %d bytes pad for loops and calls", _count); 1988 } 1989 #endif 1990 1991 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1992 MacroAssembler _masm(&cbuf); 1993 __ nop(_count); 1994 } 1995 1996 uint MachNopNode::size(PhaseRegAlloc*) const { 1997 return _count; 1998 } 1999 2000 #ifndef PRODUCT 2001 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2002 st->print("# breakpoint"); 2003 } 2004 #endif 2005 2006 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2007 MacroAssembler _masm(&cbuf); 2008 __ int3(); 2009 } 2010 2011 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2012 return MachNode::size(ra_); 2013 } 2014 2015 %} 2016 2017 encode %{ 2018 2019 enc_class call_epilog %{ 2020 if (VerifyStackAtCalls) { 2021 // Check that stack depth is unchanged: find majik cookie on stack 2022 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2023 MacroAssembler _masm(&cbuf); 2024 Label L; 2025 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2026 __ jccb(Assembler::equal, L); 2027 // Die if stack mismatch 2028 __ int3(); 2029 __ bind(L); 2030 } 2031 %} 2032 2033 %} 2034 2035 2036 //----------OPERANDS----------------------------------------------------------- 2037 // Operand definitions must precede instruction definitions for correct parsing 2038 // in the ADLC because operands constitute user defined types which are used in 2039 // instruction definitions. 2040 2041 operand immU1() %{ 2042 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(1)); 2043 match(ConI); 2044 2045 op_cost(0); 2046 format %{ %} 2047 interface(CONST_INTER); 2048 %} 2049 2050 operand immU2() %{ 2051 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(2)); 2052 match(ConI); 2053 2054 op_cost(0); 2055 format %{ %} 2056 interface(CONST_INTER); 2057 %} 2058 2059 operand immU3() %{ 2060 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(3)); 2061 match(ConI); 2062 2063 op_cost(0); 2064 format %{ %} 2065 interface(CONST_INTER); 2066 %} 2067 2068 operand immU4() %{ 2069 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(4)); 2070 match(ConI); 2071 2072 op_cost(0); 2073 format %{ %} 2074 interface(CONST_INTER); 2075 %} 2076 2077 operand immU5() %{ 2078 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(5)); 2079 match(ConI); 2080 2081 op_cost(0); 2082 format %{ %} 2083 interface(CONST_INTER); 2084 %} 2085 2086 operand immU6() %{ 2087 predicate(n->get_int() >= 0 && n->get_int() < nth_bit(6)); 2088 match(ConI); 2089 2090 op_cost(0); 2091 format %{ %} 2092 interface(CONST_INTER); 2093 %} 2094 2095 // This one generically applies only for evex, so only one version 2096 operand vecZ() %{ 2097 constraint(ALLOC_IN_RC(vectorz_reg)); 2098 match(VecZ); 2099 2100 format %{ %} 2101 interface(REG_INTER); 2102 %} 2103 2104 // Comparison Code for FP conditional move 2105 operand cmpOp_vcmppd() %{ 2106 match(Bool); 2107 2108 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2109 n->as_Bool()->_test._test != BoolTest::no_overflow); 2110 format %{ "" %} 2111 interface(COND_INTER) %{ 2112 equal (0x0, "eq"); 2113 less (0x1, "lt"); 2114 less_equal (0x2, "le"); 2115 not_equal (0xC, "ne"); 2116 greater_equal(0xD, "ge"); 2117 greater (0xE, "gt"); 2118 //TODO cannot compile (adlc breaks) without two next lines with error: 2119 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2120 // equal' for overflow. 2121 overflow (0x20, "o"); // not really supported by the instruction 2122 no_overflow (0x21, "no"); // not really supported by the instruction 2123 %} 2124 %} 2125 2126 2127 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2128 2129 // ============================================================================ 2130 2131 instruct ShouldNotReachHere() %{ 2132 match(Halt); 2133 format %{ "ud2\t# ShouldNotReachHere" %} 2134 ins_encode %{ 2135 __ ud2(); 2136 %} 2137 ins_pipe(pipe_slow); 2138 %} 2139 2140 // =================================EVEX special=============================== 2141 2142 instruct setMask(rRegI dst, rRegI src) %{ 2143 predicate(Matcher::has_predicated_vectors()); 2144 match(Set dst (SetVectMaskI src)); 2145 effect(TEMP dst); 2146 format %{ "setvectmask $dst, $src" %} 2147 ins_encode %{ 2148 __ setvectmask($dst$$Register, $src$$Register); 2149 %} 2150 ins_pipe(pipe_slow); 2151 %} 2152 2153 // ============================================================================ 2154 2155 instruct addF_reg(regF dst, regF src) %{ 2156 predicate((UseSSE>=1) && (UseAVX == 0)); 2157 match(Set dst (AddF dst src)); 2158 2159 format %{ "addss $dst, $src" %} 2160 ins_cost(150); 2161 ins_encode %{ 2162 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2163 %} 2164 ins_pipe(pipe_slow); 2165 %} 2166 2167 instruct addF_mem(regF dst, memory src) %{ 2168 predicate((UseSSE>=1) && (UseAVX == 0)); 2169 match(Set dst (AddF dst (LoadF src))); 2170 2171 format %{ "addss $dst, $src" %} 2172 ins_cost(150); 2173 ins_encode %{ 2174 __ addss($dst$$XMMRegister, $src$$Address); 2175 %} 2176 ins_pipe(pipe_slow); 2177 %} 2178 2179 instruct addF_imm(regF dst, immF con) %{ 2180 predicate((UseSSE>=1) && (UseAVX == 0)); 2181 match(Set dst (AddF dst con)); 2182 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2183 ins_cost(150); 2184 ins_encode %{ 2185 __ addss($dst$$XMMRegister, $constantaddress($con)); 2186 %} 2187 ins_pipe(pipe_slow); 2188 %} 2189 2190 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2191 predicate(UseAVX > 0); 2192 match(Set dst (AddF src1 src2)); 2193 2194 format %{ "vaddss $dst, $src1, $src2" %} 2195 ins_cost(150); 2196 ins_encode %{ 2197 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2198 %} 2199 ins_pipe(pipe_slow); 2200 %} 2201 2202 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2203 predicate(UseAVX > 0); 2204 match(Set dst (AddF src1 (LoadF src2))); 2205 2206 format %{ "vaddss $dst, $src1, $src2" %} 2207 ins_cost(150); 2208 ins_encode %{ 2209 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2210 %} 2211 ins_pipe(pipe_slow); 2212 %} 2213 2214 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2215 predicate(UseAVX > 0); 2216 match(Set dst (AddF src con)); 2217 2218 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2219 ins_cost(150); 2220 ins_encode %{ 2221 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2222 %} 2223 ins_pipe(pipe_slow); 2224 %} 2225 2226 instruct addD_reg(regD dst, regD src) %{ 2227 predicate((UseSSE>=2) && (UseAVX == 0)); 2228 match(Set dst (AddD dst src)); 2229 2230 format %{ "addsd $dst, $src" %} 2231 ins_cost(150); 2232 ins_encode %{ 2233 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2234 %} 2235 ins_pipe(pipe_slow); 2236 %} 2237 2238 instruct addD_mem(regD dst, memory src) %{ 2239 predicate((UseSSE>=2) && (UseAVX == 0)); 2240 match(Set dst (AddD dst (LoadD src))); 2241 2242 format %{ "addsd $dst, $src" %} 2243 ins_cost(150); 2244 ins_encode %{ 2245 __ addsd($dst$$XMMRegister, $src$$Address); 2246 %} 2247 ins_pipe(pipe_slow); 2248 %} 2249 2250 instruct addD_imm(regD dst, immD con) %{ 2251 predicate((UseSSE>=2) && (UseAVX == 0)); 2252 match(Set dst (AddD dst con)); 2253 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2254 ins_cost(150); 2255 ins_encode %{ 2256 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2257 %} 2258 ins_pipe(pipe_slow); 2259 %} 2260 2261 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2262 predicate(UseAVX > 0); 2263 match(Set dst (AddD src1 src2)); 2264 2265 format %{ "vaddsd $dst, $src1, $src2" %} 2266 ins_cost(150); 2267 ins_encode %{ 2268 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2269 %} 2270 ins_pipe(pipe_slow); 2271 %} 2272 2273 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2274 predicate(UseAVX > 0); 2275 match(Set dst (AddD src1 (LoadD src2))); 2276 2277 format %{ "vaddsd $dst, $src1, $src2" %} 2278 ins_cost(150); 2279 ins_encode %{ 2280 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2281 %} 2282 ins_pipe(pipe_slow); 2283 %} 2284 2285 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2286 predicate(UseAVX > 0); 2287 match(Set dst (AddD src con)); 2288 2289 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2290 ins_cost(150); 2291 ins_encode %{ 2292 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2293 %} 2294 ins_pipe(pipe_slow); 2295 %} 2296 2297 instruct subF_reg(regF dst, regF src) %{ 2298 predicate((UseSSE>=1) && (UseAVX == 0)); 2299 match(Set dst (SubF dst src)); 2300 2301 format %{ "subss $dst, $src" %} 2302 ins_cost(150); 2303 ins_encode %{ 2304 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2305 %} 2306 ins_pipe(pipe_slow); 2307 %} 2308 2309 instruct subF_mem(regF dst, memory src) %{ 2310 predicate((UseSSE>=1) && (UseAVX == 0)); 2311 match(Set dst (SubF dst (LoadF src))); 2312 2313 format %{ "subss $dst, $src" %} 2314 ins_cost(150); 2315 ins_encode %{ 2316 __ subss($dst$$XMMRegister, $src$$Address); 2317 %} 2318 ins_pipe(pipe_slow); 2319 %} 2320 2321 instruct subF_imm(regF dst, immF con) %{ 2322 predicate((UseSSE>=1) && (UseAVX == 0)); 2323 match(Set dst (SubF dst con)); 2324 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2325 ins_cost(150); 2326 ins_encode %{ 2327 __ subss($dst$$XMMRegister, $constantaddress($con)); 2328 %} 2329 ins_pipe(pipe_slow); 2330 %} 2331 2332 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2333 predicate(UseAVX > 0); 2334 match(Set dst (SubF src1 src2)); 2335 2336 format %{ "vsubss $dst, $src1, $src2" %} 2337 ins_cost(150); 2338 ins_encode %{ 2339 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2340 %} 2341 ins_pipe(pipe_slow); 2342 %} 2343 2344 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2345 predicate(UseAVX > 0); 2346 match(Set dst (SubF src1 (LoadF src2))); 2347 2348 format %{ "vsubss $dst, $src1, $src2" %} 2349 ins_cost(150); 2350 ins_encode %{ 2351 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2352 %} 2353 ins_pipe(pipe_slow); 2354 %} 2355 2356 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2357 predicate(UseAVX > 0); 2358 match(Set dst (SubF src con)); 2359 2360 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2361 ins_cost(150); 2362 ins_encode %{ 2363 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2364 %} 2365 ins_pipe(pipe_slow); 2366 %} 2367 2368 instruct subD_reg(regD dst, regD src) %{ 2369 predicate((UseSSE>=2) && (UseAVX == 0)); 2370 match(Set dst (SubD dst src)); 2371 2372 format %{ "subsd $dst, $src" %} 2373 ins_cost(150); 2374 ins_encode %{ 2375 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2376 %} 2377 ins_pipe(pipe_slow); 2378 %} 2379 2380 instruct subD_mem(regD dst, memory src) %{ 2381 predicate((UseSSE>=2) && (UseAVX == 0)); 2382 match(Set dst (SubD dst (LoadD src))); 2383 2384 format %{ "subsd $dst, $src" %} 2385 ins_cost(150); 2386 ins_encode %{ 2387 __ subsd($dst$$XMMRegister, $src$$Address); 2388 %} 2389 ins_pipe(pipe_slow); 2390 %} 2391 2392 instruct subD_imm(regD dst, immD con) %{ 2393 predicate((UseSSE>=2) && (UseAVX == 0)); 2394 match(Set dst (SubD dst con)); 2395 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2396 ins_cost(150); 2397 ins_encode %{ 2398 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2399 %} 2400 ins_pipe(pipe_slow); 2401 %} 2402 2403 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2404 predicate(UseAVX > 0); 2405 match(Set dst (SubD src1 src2)); 2406 2407 format %{ "vsubsd $dst, $src1, $src2" %} 2408 ins_cost(150); 2409 ins_encode %{ 2410 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2411 %} 2412 ins_pipe(pipe_slow); 2413 %} 2414 2415 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2416 predicate(UseAVX > 0); 2417 match(Set dst (SubD src1 (LoadD src2))); 2418 2419 format %{ "vsubsd $dst, $src1, $src2" %} 2420 ins_cost(150); 2421 ins_encode %{ 2422 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2423 %} 2424 ins_pipe(pipe_slow); 2425 %} 2426 2427 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2428 predicate(UseAVX > 0); 2429 match(Set dst (SubD src con)); 2430 2431 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2432 ins_cost(150); 2433 ins_encode %{ 2434 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2435 %} 2436 ins_pipe(pipe_slow); 2437 %} 2438 2439 instruct mulF_reg(regF dst, regF src) %{ 2440 predicate((UseSSE>=1) && (UseAVX == 0)); 2441 match(Set dst (MulF dst src)); 2442 2443 format %{ "mulss $dst, $src" %} 2444 ins_cost(150); 2445 ins_encode %{ 2446 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2447 %} 2448 ins_pipe(pipe_slow); 2449 %} 2450 2451 instruct mulF_mem(regF dst, memory src) %{ 2452 predicate((UseSSE>=1) && (UseAVX == 0)); 2453 match(Set dst (MulF dst (LoadF src))); 2454 2455 format %{ "mulss $dst, $src" %} 2456 ins_cost(150); 2457 ins_encode %{ 2458 __ mulss($dst$$XMMRegister, $src$$Address); 2459 %} 2460 ins_pipe(pipe_slow); 2461 %} 2462 2463 instruct mulF_imm(regF dst, immF con) %{ 2464 predicate((UseSSE>=1) && (UseAVX == 0)); 2465 match(Set dst (MulF dst con)); 2466 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2467 ins_cost(150); 2468 ins_encode %{ 2469 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2470 %} 2471 ins_pipe(pipe_slow); 2472 %} 2473 2474 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2475 predicate(UseAVX > 0); 2476 match(Set dst (MulF src1 src2)); 2477 2478 format %{ "vmulss $dst, $src1, $src2" %} 2479 ins_cost(150); 2480 ins_encode %{ 2481 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2482 %} 2483 ins_pipe(pipe_slow); 2484 %} 2485 2486 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2487 predicate(UseAVX > 0); 2488 match(Set dst (MulF src1 (LoadF src2))); 2489 2490 format %{ "vmulss $dst, $src1, $src2" %} 2491 ins_cost(150); 2492 ins_encode %{ 2493 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2494 %} 2495 ins_pipe(pipe_slow); 2496 %} 2497 2498 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2499 predicate(UseAVX > 0); 2500 match(Set dst (MulF src con)); 2501 2502 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2503 ins_cost(150); 2504 ins_encode %{ 2505 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2506 %} 2507 ins_pipe(pipe_slow); 2508 %} 2509 2510 instruct mulD_reg(regD dst, regD src) %{ 2511 predicate((UseSSE>=2) && (UseAVX == 0)); 2512 match(Set dst (MulD dst src)); 2513 2514 format %{ "mulsd $dst, $src" %} 2515 ins_cost(150); 2516 ins_encode %{ 2517 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2518 %} 2519 ins_pipe(pipe_slow); 2520 %} 2521 2522 instruct mulD_mem(regD dst, memory src) %{ 2523 predicate((UseSSE>=2) && (UseAVX == 0)); 2524 match(Set dst (MulD dst (LoadD src))); 2525 2526 format %{ "mulsd $dst, $src" %} 2527 ins_cost(150); 2528 ins_encode %{ 2529 __ mulsd($dst$$XMMRegister, $src$$Address); 2530 %} 2531 ins_pipe(pipe_slow); 2532 %} 2533 2534 instruct mulD_imm(regD dst, immD con) %{ 2535 predicate((UseSSE>=2) && (UseAVX == 0)); 2536 match(Set dst (MulD dst con)); 2537 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2538 ins_cost(150); 2539 ins_encode %{ 2540 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2541 %} 2542 ins_pipe(pipe_slow); 2543 %} 2544 2545 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2546 predicate(UseAVX > 0); 2547 match(Set dst (MulD src1 src2)); 2548 2549 format %{ "vmulsd $dst, $src1, $src2" %} 2550 ins_cost(150); 2551 ins_encode %{ 2552 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2553 %} 2554 ins_pipe(pipe_slow); 2555 %} 2556 2557 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2558 predicate(UseAVX > 0); 2559 match(Set dst (MulD src1 (LoadD src2))); 2560 2561 format %{ "vmulsd $dst, $src1, $src2" %} 2562 ins_cost(150); 2563 ins_encode %{ 2564 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2565 %} 2566 ins_pipe(pipe_slow); 2567 %} 2568 2569 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2570 predicate(UseAVX > 0); 2571 match(Set dst (MulD src con)); 2572 2573 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2574 ins_cost(150); 2575 ins_encode %{ 2576 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2577 %} 2578 ins_pipe(pipe_slow); 2579 %} 2580 2581 instruct divF_reg(regF dst, regF src) %{ 2582 predicate((UseSSE>=1) && (UseAVX == 0)); 2583 match(Set dst (DivF dst src)); 2584 2585 format %{ "divss $dst, $src" %} 2586 ins_cost(150); 2587 ins_encode %{ 2588 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2589 %} 2590 ins_pipe(pipe_slow); 2591 %} 2592 2593 instruct divF_mem(regF dst, memory src) %{ 2594 predicate((UseSSE>=1) && (UseAVX == 0)); 2595 match(Set dst (DivF dst (LoadF src))); 2596 2597 format %{ "divss $dst, $src" %} 2598 ins_cost(150); 2599 ins_encode %{ 2600 __ divss($dst$$XMMRegister, $src$$Address); 2601 %} 2602 ins_pipe(pipe_slow); 2603 %} 2604 2605 instruct divF_imm(regF dst, immF con) %{ 2606 predicate((UseSSE>=1) && (UseAVX == 0)); 2607 match(Set dst (DivF dst con)); 2608 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2609 ins_cost(150); 2610 ins_encode %{ 2611 __ divss($dst$$XMMRegister, $constantaddress($con)); 2612 %} 2613 ins_pipe(pipe_slow); 2614 %} 2615 2616 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2617 predicate(UseAVX > 0); 2618 match(Set dst (DivF src1 src2)); 2619 2620 format %{ "vdivss $dst, $src1, $src2" %} 2621 ins_cost(150); 2622 ins_encode %{ 2623 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2624 %} 2625 ins_pipe(pipe_slow); 2626 %} 2627 2628 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2629 predicate(UseAVX > 0); 2630 match(Set dst (DivF src1 (LoadF src2))); 2631 2632 format %{ "vdivss $dst, $src1, $src2" %} 2633 ins_cost(150); 2634 ins_encode %{ 2635 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2636 %} 2637 ins_pipe(pipe_slow); 2638 %} 2639 2640 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2641 predicate(UseAVX > 0); 2642 match(Set dst (DivF src con)); 2643 2644 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2645 ins_cost(150); 2646 ins_encode %{ 2647 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2648 %} 2649 ins_pipe(pipe_slow); 2650 %} 2651 2652 instruct divD_reg(regD dst, regD src) %{ 2653 predicate((UseSSE>=2) && (UseAVX == 0)); 2654 match(Set dst (DivD dst src)); 2655 2656 format %{ "divsd $dst, $src" %} 2657 ins_cost(150); 2658 ins_encode %{ 2659 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2660 %} 2661 ins_pipe(pipe_slow); 2662 %} 2663 2664 instruct divD_mem(regD dst, memory src) %{ 2665 predicate((UseSSE>=2) && (UseAVX == 0)); 2666 match(Set dst (DivD dst (LoadD src))); 2667 2668 format %{ "divsd $dst, $src" %} 2669 ins_cost(150); 2670 ins_encode %{ 2671 __ divsd($dst$$XMMRegister, $src$$Address); 2672 %} 2673 ins_pipe(pipe_slow); 2674 %} 2675 2676 instruct divD_imm(regD dst, immD con) %{ 2677 predicate((UseSSE>=2) && (UseAVX == 0)); 2678 match(Set dst (DivD dst con)); 2679 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2680 ins_cost(150); 2681 ins_encode %{ 2682 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2683 %} 2684 ins_pipe(pipe_slow); 2685 %} 2686 2687 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2688 predicate(UseAVX > 0); 2689 match(Set dst (DivD src1 src2)); 2690 2691 format %{ "vdivsd $dst, $src1, $src2" %} 2692 ins_cost(150); 2693 ins_encode %{ 2694 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2695 %} 2696 ins_pipe(pipe_slow); 2697 %} 2698 2699 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2700 predicate(UseAVX > 0); 2701 match(Set dst (DivD src1 (LoadD src2))); 2702 2703 format %{ "vdivsd $dst, $src1, $src2" %} 2704 ins_cost(150); 2705 ins_encode %{ 2706 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2707 %} 2708 ins_pipe(pipe_slow); 2709 %} 2710 2711 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2712 predicate(UseAVX > 0); 2713 match(Set dst (DivD src con)); 2714 2715 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2716 ins_cost(150); 2717 ins_encode %{ 2718 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2719 %} 2720 ins_pipe(pipe_slow); 2721 %} 2722 2723 instruct absF_reg(regF dst) %{ 2724 predicate((UseSSE>=1) && (UseAVX == 0)); 2725 match(Set dst (AbsF dst)); 2726 ins_cost(150); 2727 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2728 ins_encode %{ 2729 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2730 %} 2731 ins_pipe(pipe_slow); 2732 %} 2733 2734 instruct absF_reg_reg(regF dst, regF src) %{ 2735 predicate(VM_Version::supports_avxonly()); 2736 match(Set dst (AbsF src)); 2737 ins_cost(150); 2738 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2739 ins_encode %{ 2740 int vector_len = 0; 2741 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2742 ExternalAddress(float_signmask()), vector_len); 2743 %} 2744 ins_pipe(pipe_slow); 2745 %} 2746 2747 #ifdef _LP64 2748 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2749 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2750 match(Set dst (AbsF src)); 2751 ins_cost(150); 2752 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2753 ins_encode %{ 2754 int vector_len = 0; 2755 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2756 ExternalAddress(float_signmask()), vector_len); 2757 %} 2758 ins_pipe(pipe_slow); 2759 %} 2760 2761 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2762 predicate(VM_Version::supports_avx512novl()); 2763 match(Set dst (AbsF src1)); 2764 effect(TEMP src2); 2765 ins_cost(150); 2766 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2767 ins_encode %{ 2768 int vector_len = 0; 2769 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2770 ExternalAddress(float_signmask()), vector_len); 2771 %} 2772 ins_pipe(pipe_slow); 2773 %} 2774 #else // _LP64 2775 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2776 predicate(UseAVX > 2); 2777 match(Set dst (AbsF src)); 2778 ins_cost(150); 2779 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2780 ins_encode %{ 2781 int vector_len = 0; 2782 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2783 ExternalAddress(float_signmask()), vector_len); 2784 %} 2785 ins_pipe(pipe_slow); 2786 %} 2787 #endif 2788 2789 instruct absD_reg(regD dst) %{ 2790 predicate((UseSSE>=2) && (UseAVX == 0)); 2791 match(Set dst (AbsD dst)); 2792 ins_cost(150); 2793 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2794 "# abs double by sign masking" %} 2795 ins_encode %{ 2796 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2797 %} 2798 ins_pipe(pipe_slow); 2799 %} 2800 2801 instruct absD_reg_reg(regD dst, regD src) %{ 2802 predicate(VM_Version::supports_avxonly()); 2803 match(Set dst (AbsD src)); 2804 ins_cost(150); 2805 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2806 "# abs double by sign masking" %} 2807 ins_encode %{ 2808 int vector_len = 0; 2809 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2810 ExternalAddress(double_signmask()), vector_len); 2811 %} 2812 ins_pipe(pipe_slow); 2813 %} 2814 2815 #ifdef _LP64 2816 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2817 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2818 match(Set dst (AbsD src)); 2819 ins_cost(150); 2820 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2821 "# abs double by sign masking" %} 2822 ins_encode %{ 2823 int vector_len = 0; 2824 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2825 ExternalAddress(double_signmask()), vector_len); 2826 %} 2827 ins_pipe(pipe_slow); 2828 %} 2829 2830 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2831 predicate(VM_Version::supports_avx512novl()); 2832 match(Set dst (AbsD src1)); 2833 effect(TEMP src2); 2834 ins_cost(150); 2835 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2836 ins_encode %{ 2837 int vector_len = 0; 2838 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2839 ExternalAddress(double_signmask()), vector_len); 2840 %} 2841 ins_pipe(pipe_slow); 2842 %} 2843 #else // _LP64 2844 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2845 predicate(UseAVX > 2); 2846 match(Set dst (AbsD src)); 2847 ins_cost(150); 2848 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2849 "# abs double by sign masking" %} 2850 ins_encode %{ 2851 int vector_len = 0; 2852 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2853 ExternalAddress(double_signmask()), vector_len); 2854 %} 2855 ins_pipe(pipe_slow); 2856 %} 2857 #endif 2858 2859 instruct negI_rReg_2(rRegI dst, rFlagsReg cr) 2860 %{ 2861 match(Set dst (NegI dst)); 2862 effect(KILL cr); 2863 2864 format %{ "negl $dst\t# int" %} 2865 ins_encode %{ 2866 __ negl($dst$$Register); 2867 %} 2868 ins_pipe(ialu_reg); 2869 %} 2870 2871 instruct negL_rReg_2(rRegL dst, rFlagsReg cr) 2872 %{ 2873 match(Set dst (NegL dst)); 2874 effect(KILL cr); 2875 2876 format %{ "negq $dst\t# int" %} 2877 ins_encode %{ 2878 __ negq($dst$$Register); 2879 %} 2880 ins_pipe(ialu_reg); 2881 %} 2882 2883 instruct negF_reg(regF dst) %{ 2884 predicate((UseSSE>=1) && (UseAVX == 0)); 2885 match(Set dst (NegF dst)); 2886 ins_cost(150); 2887 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2888 ins_encode %{ 2889 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2890 %} 2891 ins_pipe(pipe_slow); 2892 %} 2893 2894 instruct negF_reg_reg(regF dst, regF src) %{ 2895 predicate(UseAVX > 0); 2896 match(Set dst (NegF src)); 2897 ins_cost(150); 2898 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2899 ins_encode %{ 2900 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2901 ExternalAddress(float_signflip())); 2902 %} 2903 ins_pipe(pipe_slow); 2904 %} 2905 2906 instruct negD_reg(regD dst) %{ 2907 predicate((UseSSE>=2) && (UseAVX == 0)); 2908 match(Set dst (NegD dst)); 2909 ins_cost(150); 2910 format %{ "xorpd $dst, [0x8000000000000000]\t" 2911 "# neg double by sign flipping" %} 2912 ins_encode %{ 2913 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2914 %} 2915 ins_pipe(pipe_slow); 2916 %} 2917 2918 instruct negD_reg_reg(regD dst, regD src) %{ 2919 predicate(UseAVX > 0); 2920 match(Set dst (NegD src)); 2921 ins_cost(150); 2922 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2923 "# neg double by sign flipping" %} 2924 ins_encode %{ 2925 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2926 ExternalAddress(double_signflip())); 2927 %} 2928 ins_pipe(pipe_slow); 2929 %} 2930 2931 instruct sqrtF_reg(regF dst, regF src) %{ 2932 predicate(UseSSE>=1); 2933 match(Set dst (SqrtF src)); 2934 2935 format %{ "sqrtss $dst, $src" %} 2936 ins_cost(150); 2937 ins_encode %{ 2938 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2939 %} 2940 ins_pipe(pipe_slow); 2941 %} 2942 2943 instruct sqrtF_mem(regF dst, memory src) %{ 2944 predicate(UseSSE>=1); 2945 match(Set dst (SqrtF (LoadF src))); 2946 2947 format %{ "sqrtss $dst, $src" %} 2948 ins_cost(150); 2949 ins_encode %{ 2950 __ sqrtss($dst$$XMMRegister, $src$$Address); 2951 %} 2952 ins_pipe(pipe_slow); 2953 %} 2954 2955 instruct sqrtF_imm(regF dst, immF con) %{ 2956 predicate(UseSSE>=1); 2957 match(Set dst (SqrtF con)); 2958 2959 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2960 ins_cost(150); 2961 ins_encode %{ 2962 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2963 %} 2964 ins_pipe(pipe_slow); 2965 %} 2966 2967 instruct sqrtD_reg(regD dst, regD src) %{ 2968 predicate(UseSSE>=2); 2969 match(Set dst (SqrtD src)); 2970 2971 format %{ "sqrtsd $dst, $src" %} 2972 ins_cost(150); 2973 ins_encode %{ 2974 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2975 %} 2976 ins_pipe(pipe_slow); 2977 %} 2978 2979 instruct sqrtD_mem(regD dst, memory src) %{ 2980 predicate(UseSSE>=2); 2981 match(Set dst (SqrtD (LoadD src))); 2982 2983 format %{ "sqrtsd $dst, $src" %} 2984 ins_cost(150); 2985 ins_encode %{ 2986 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2987 %} 2988 ins_pipe(pipe_slow); 2989 %} 2990 2991 instruct sqrtD_imm(regD dst, immD con) %{ 2992 predicate(UseSSE>=2); 2993 match(Set dst (SqrtD con)); 2994 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2995 ins_cost(150); 2996 ins_encode %{ 2997 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2998 %} 2999 ins_pipe(pipe_slow); 3000 %} 3001 3002 instruct onspinwait() %{ 3003 match(OnSpinWait); 3004 ins_cost(200); 3005 3006 format %{ 3007 $$template 3008 if (os::is_MP()) { 3009 $$emit$$"pause\t! membar_onspinwait" 3010 } else { 3011 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 3012 } 3013 %} 3014 ins_encode %{ 3015 __ pause(); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 // a * b + c 3021 instruct fmaD_reg(regD a, regD b, regD c) %{ 3022 predicate(UseFMA); 3023 match(Set c (FmaD c (Binary a b))); 3024 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3028 %} 3029 ins_pipe( pipe_slow ); 3030 %} 3031 3032 // a * b + c 3033 instruct fmaF_reg(regF a, regF b, regF c) %{ 3034 predicate(UseFMA); 3035 match(Set c (FmaF c (Binary a b))); 3036 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3037 ins_cost(150); 3038 ins_encode %{ 3039 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3040 %} 3041 ins_pipe( pipe_slow ); 3042 %} 3043 3044 // ====================VECTOR INSTRUCTIONS===================================== 3045 3046 instruct reinterpretS(vecS dst) %{ 3047 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3048 match(Set dst (VectorReinterpret dst)); 3049 ins_cost(125); 3050 format %{ " # reinterpret $dst" %} 3051 ins_encode %{ 3052 // empty 3053 %} 3054 ins_pipe( pipe_slow ); 3055 %} 3056 3057 instruct reinterpretS2D(vecD dst, vecS src, rRegL scratch) %{ 3058 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3059 match(Set dst (VectorReinterpret src)); 3060 ins_cost(125); 3061 effect(TEMP dst, TEMP scratch); 3062 format %{ " # reinterpret $dst,$src" %} 3063 ins_encode %{ 3064 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3065 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3066 %} 3067 ins_pipe( pipe_slow ); 3068 %} 3069 3070 instruct reinterpretS2D_avx(vecD dst, vecS src, rRegL scratch) %{ 3071 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3072 match(Set dst (VectorReinterpret src)); 3073 ins_cost(125); 3074 effect(TEMP dst, TEMP scratch); 3075 format %{ " # reinterpret $dst,$src" %} 3076 ins_encode %{ 3077 int vector_len = 0; 3078 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3079 %} 3080 ins_pipe( pipe_slow ); 3081 %} 3082 3083 instruct reinterpretS2X(vecX dst, vecS src, rRegL scratch) %{ 3084 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3085 match(Set dst (VectorReinterpret src)); 3086 ins_cost(125); 3087 effect(TEMP dst, TEMP scratch); 3088 format %{ " # reinterpret $dst,$src" %} 3089 ins_encode %{ 3090 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3091 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3092 %} 3093 ins_pipe( pipe_slow ); 3094 %} 3095 3096 instruct reinterpretS2X_avx(vecX dst, vecS src, rRegL scratch) %{ 3097 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3098 match(Set dst (VectorReinterpret src)); 3099 ins_cost(125); 3100 effect(TEMP scratch); 3101 format %{ " # reinterpret $dst,$src" %} 3102 ins_encode %{ 3103 int vector_len = 0; 3104 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3105 %} 3106 ins_pipe( pipe_slow ); 3107 %} 3108 3109 instruct reinterpretS2Y(vecY dst, vecS src, rRegL scratch) %{ 3110 predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3111 match(Set dst (VectorReinterpret src)); 3112 ins_cost(125); 3113 effect(TEMP scratch); 3114 format %{ " # reinterpret $dst,$src" %} 3115 ins_encode %{ 3116 int vector_len = 1; 3117 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3118 %} 3119 ins_pipe( pipe_slow ); 3120 %} 3121 3122 instruct reinterpretS2Z(vecZ dst, vecS src, rRegL scratch) %{ 3123 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4); 3124 match(Set dst (VectorReinterpret src)); 3125 ins_cost(125); 3126 effect(TEMP scratch); 3127 format %{ " # reinterpret $dst,$src" %} 3128 ins_encode %{ 3129 int vector_len = 2; 3130 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), vector_len, $scratch$$Register); 3131 %} 3132 ins_pipe( pipe_slow ); 3133 %} 3134 3135 instruct reinterpretD2S(vecS dst, vecD src) %{ 3136 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3137 match(Set dst (VectorReinterpret src)); 3138 ins_cost(125); 3139 format %{ " # reinterpret $dst,$src" %} 3140 ins_encode %{ 3141 // If register is the same, then move is not needed. 3142 if ($dst$$XMMRegister != $src$$XMMRegister) { 3143 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3144 } 3145 %} 3146 ins_pipe( pipe_slow ); 3147 %} 3148 3149 instruct reinterpretD(vecD dst) %{ 3150 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3151 match(Set dst (VectorReinterpret dst)); 3152 ins_cost(125); 3153 format %{ " # reinterpret $dst" %} 3154 ins_encode %{ 3155 // empty 3156 %} 3157 ins_pipe( pipe_slow ); 3158 %} 3159 3160 instruct reinterpretD2X(vecX dst, vecD src, rRegL scratch) %{ 3161 predicate(UseAVX == 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3162 match(Set dst (VectorReinterpret src)); 3163 ins_cost(125); 3164 effect(TEMP dst, TEMP scratch); 3165 format %{ " # reinterpret $dst,$src" %} 3166 ins_encode %{ 3167 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3168 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3169 %} 3170 ins_pipe( pipe_slow ); 3171 %} 3172 3173 instruct reinterpretD2X_avx(vecX dst, vecD src, rRegL scratch) %{ 3174 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3175 match(Set dst (VectorReinterpret src)); 3176 ins_cost(125); 3177 effect(TEMP dst, TEMP scratch); 3178 format %{ " # reinterpret $dst,$src" %} 3179 ins_encode %{ 3180 int vector_len = 0; 3181 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 3186 instruct reinterpretD2Y(vecY dst, vecD src, rRegL scratch) %{ 3187 predicate(UseAVX >= 2 && n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3188 match(Set dst (VectorReinterpret src)); 3189 ins_cost(125); 3190 effect(TEMP scratch); 3191 format %{ " # reinterpret $dst,$src" %} 3192 ins_encode %{ 3193 int vector_len = 1; 3194 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3195 %} 3196 ins_pipe( pipe_slow ); 3197 %} 3198 3199 instruct reinterpretD2Z(vecZ dst, vecD src, rRegL scratch) %{ 3200 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); 3201 match(Set dst (VectorReinterpret src)); 3202 ins_cost(125); 3203 effect(TEMP scratch); 3204 format %{ " # reinterpret $dst,$src" %} 3205 ins_encode %{ 3206 int vector_len = 2; 3207 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_64_bit_mask()), vector_len, $scratch$$Register); 3208 %} 3209 ins_pipe( pipe_slow ); 3210 %} 3211 3212 instruct reinterpretX2S(vecS dst, vecX src) %{ 3213 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3214 match(Set dst (VectorReinterpret src)); 3215 ins_cost(125); 3216 format %{ " # reinterpret $dst,$src" %} 3217 ins_encode %{ 3218 // If register is the same, then move is not needed. 3219 if ($dst$$XMMRegister != $src$$XMMRegister) { 3220 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3221 } 3222 %} 3223 ins_pipe( pipe_slow ); 3224 %} 3225 3226 instruct reinterpretX2D(vecD dst, vecX src) %{ 3227 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3228 match(Set dst (VectorReinterpret src)); 3229 ins_cost(125); 3230 format %{ " # reinterpret $dst,$src" %} 3231 ins_encode %{ 3232 // If register is the same, then move is not needed. 3233 if ($dst$$XMMRegister != $src$$XMMRegister) { 3234 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3235 } 3236 %} 3237 ins_pipe( pipe_slow ); 3238 %} 3239 3240 instruct reinterpretX(vecX dst) %{ 3241 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3242 match(Set dst (VectorReinterpret dst)); 3243 ins_cost(125); 3244 format %{ " # reinterpret $dst" %} 3245 ins_encode %{ 3246 // empty 3247 %} 3248 ins_pipe( pipe_slow ); 3249 %} 3250 3251 instruct reinterpretX2Y(vecY dst, vecX src) %{ 3252 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3253 match(Set dst (VectorReinterpret src)); 3254 ins_cost(125); 3255 effect(TEMP dst); 3256 format %{ " # reinterpret $dst,$src" %} 3257 ins_encode %{ 3258 int vector_len = 1; 3259 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3260 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved 3261 %} 3262 ins_pipe( pipe_slow ); 3263 %} 3264 3265 instruct reinterpretX2Z(vecZ dst, vecX src) %{ 3266 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); 3267 match(Set dst (VectorReinterpret src)); 3268 ins_cost(125); 3269 effect(TEMP dst); 3270 format %{ " # reinterpret $dst,$src" %} 3271 ins_encode %{ 3272 int vector_len = 2; 3273 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3274 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); // just 128-bits need moved 3275 %} 3276 ins_pipe( pipe_slow ); 3277 %} 3278 3279 instruct reinterpretY2S(vecS dst, vecY src) %{ 3280 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3281 match(Set dst (VectorReinterpret src)); 3282 ins_cost(125); 3283 format %{ " # reinterpret $dst,$src" %} 3284 ins_encode %{ 3285 // If register is the same, then move is not needed. 3286 if ($dst$$XMMRegister != $src$$XMMRegister) { 3287 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3288 } 3289 %} 3290 ins_pipe( pipe_slow ); 3291 %} 3292 3293 instruct reinterpretY2D(vecD dst, vecY src) %{ 3294 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3295 match(Set dst (VectorReinterpret src)); 3296 ins_cost(125); 3297 format %{ " # reinterpret $dst,$src" %} 3298 ins_encode %{ 3299 // If register is the same, then move is not needed. 3300 if ($dst$$XMMRegister != $src$$XMMRegister) { 3301 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3302 } 3303 %} 3304 ins_pipe( pipe_slow ); 3305 %} 3306 3307 instruct reinterpretY2X(vecX dst, vecY src) %{ 3308 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3309 match(Set dst (VectorReinterpret src)); 3310 ins_cost(125); 3311 format %{ " # reinterpret $dst,$src" %} 3312 ins_encode %{ 3313 // If register is the same, then move is not needed. 3314 if ($dst$$XMMRegister != $src$$XMMRegister) { 3315 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3316 } 3317 %} 3318 ins_pipe( pipe_slow ); 3319 %} 3320 3321 instruct reinterpretY(vecY dst) %{ 3322 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3323 match(Set dst (VectorReinterpret dst)); 3324 ins_cost(125); 3325 format %{ " # reinterpret $dst" %} 3326 ins_encode %{ 3327 // empty 3328 %} 3329 ins_pipe( pipe_slow ); 3330 %} 3331 3332 instruct reinterpretY2Z(vecZ dst, vecY src) %{ 3333 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 32); 3334 match(Set dst (VectorReinterpret src)); 3335 ins_cost(125); 3336 effect(TEMP dst); 3337 format %{ " # reinterpret $dst,$src" %} 3338 ins_encode %{ 3339 int vector_len = 2; 3340 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3341 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3342 %} 3343 ins_pipe( pipe_slow ); 3344 %} 3345 3346 instruct reinterpretZ2S(vecS dst, vecZ src) %{ 3347 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3348 match(Set dst (VectorReinterpret src)); 3349 ins_cost(125); 3350 format %{ " # reinterpret $dst,$src" %} 3351 ins_encode %{ 3352 // If register is the same, then move is not needed. 3353 if ($dst$$XMMRegister != $src$$XMMRegister) { 3354 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3355 } 3356 %} 3357 ins_pipe( pipe_slow ); 3358 %} 3359 3360 instruct reinterpretZ2D(vecD dst, vecZ src) %{ 3361 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3362 match(Set dst (VectorReinterpret src)); 3363 ins_cost(125); 3364 format %{ " # reinterpret $dst,$src" %} 3365 ins_encode %{ 3366 // If register is the same, then move is not needed. 3367 if ($dst$$XMMRegister != $src$$XMMRegister) { 3368 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3369 } 3370 %} 3371 ins_pipe( pipe_slow ); 3372 %} 3373 3374 instruct reinterpretZ2X(vecX dst, vecZ src) %{ 3375 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3376 match(Set dst (VectorReinterpret src)); 3377 ins_cost(125); 3378 format %{ " # reinterpret $dst,$src" %} 3379 ins_encode %{ 3380 // If register is the same, then move is not needed. 3381 if ($dst$$XMMRegister != $src$$XMMRegister) { 3382 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 3383 } 3384 %} 3385 ins_pipe( pipe_slow ); 3386 %} 3387 3388 instruct reinterpretZ2Y(vecY dst, vecZ src) %{ 3389 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 32 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3390 match(Set dst (VectorReinterpret src)); 3391 ins_cost(125); 3392 format %{ " # reinterpret $dst,$src" %} 3393 ins_encode %{ 3394 // If register is the same, then move is not needed. 3395 if ($dst$$XMMRegister != $src$$XMMRegister) { 3396 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 3397 } 3398 %} 3399 ins_pipe( pipe_slow ); 3400 %} 3401 3402 instruct reinterpretZ(vecZ dst) %{ 3403 predicate(n->bottom_type()->is_vect()->length_in_bytes() == 64 && n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 64); 3404 match(Set dst (VectorReinterpret dst)); 3405 ins_cost(125); 3406 format %{ " # reinterpret $dst" %} 3407 ins_encode %{ 3408 // empty 3409 %} 3410 ins_pipe( pipe_slow ); 3411 %} 3412 3413 // ========== 3414 3415 // Load vectors (1 byte long) 3416 instruct loadV1(vecS dst, memory mem, rRegI tmp) %{ 3417 predicate(n->as_LoadVector()->memory_size() == 1); 3418 match(Set dst (LoadVector mem)); 3419 ins_cost(125); 3420 effect(TEMP tmp); 3421 format %{ "movzbl $tmp,$mem\n\t" 3422 "movd $dst,$tmp\t! load vector (1 byte)" %} 3423 ins_encode %{ 3424 __ movzbl($tmp$$Register, $mem$$Address); 3425 __ movdl($dst$$XMMRegister, $tmp$$Register); 3426 %} 3427 ins_pipe( pipe_slow ); 3428 %} 3429 3430 // Load vectors (2 bytes long) 3431 instruct loadV2(vecS dst, memory mem, rRegI tmp) %{ 3432 predicate(n->as_LoadVector()->memory_size() == 2); 3433 match(Set dst (LoadVector mem)); 3434 ins_cost(125); 3435 effect(TEMP tmp); 3436 format %{ "movzwl $tmp,$mem\n\t" 3437 "movd $dst,$tmp\t! load vector (2 bytes)" %} 3438 ins_encode %{ 3439 __ movzwl($tmp$$Register, $mem$$Address); 3440 __ movdl($dst$$XMMRegister, $tmp$$Register); 3441 %} 3442 ins_pipe( pipe_slow ); 3443 %} 3444 3445 // Load vectors (4 bytes long) 3446 instruct loadV4(vecS dst, memory mem) %{ 3447 predicate(n->as_LoadVector()->memory_size() == 4); 3448 match(Set dst (LoadVector mem)); 3449 ins_cost(125); 3450 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 3451 ins_encode %{ 3452 __ movdl($dst$$XMMRegister, $mem$$Address); 3453 %} 3454 ins_pipe( pipe_slow ); 3455 %} 3456 3457 // Load vectors (8 bytes long) 3458 instruct loadV8(vecD dst, memory mem) %{ 3459 predicate(n->as_LoadVector()->memory_size() == 8); 3460 match(Set dst (LoadVector mem)); 3461 ins_cost(125); 3462 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 3463 ins_encode %{ 3464 __ movq($dst$$XMMRegister, $mem$$Address); 3465 %} 3466 ins_pipe( pipe_slow ); 3467 %} 3468 3469 // Load vectors (16 bytes long) 3470 instruct loadV16(vecX dst, memory mem) %{ 3471 predicate(n->as_LoadVector()->memory_size() == 16); 3472 match(Set dst (LoadVector mem)); 3473 ins_cost(125); 3474 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 3475 ins_encode %{ 3476 __ movdqu($dst$$XMMRegister, $mem$$Address); 3477 %} 3478 ins_pipe( pipe_slow ); 3479 %} 3480 3481 // Load vectors (32 bytes long) 3482 instruct loadV32(vecY dst, memory mem) %{ 3483 predicate(n->as_LoadVector()->memory_size() == 32); 3484 match(Set dst (LoadVector mem)); 3485 ins_cost(125); 3486 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 3487 ins_encode %{ 3488 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 3489 %} 3490 ins_pipe( pipe_slow ); 3491 %} 3492 3493 // Load vectors (64 bytes long) 3494 instruct loadV64_dword(vecZ dst, memory mem) %{ 3495 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 3496 match(Set dst (LoadVector mem)); 3497 ins_cost(125); 3498 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 3499 ins_encode %{ 3500 int vector_len = 2; 3501 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 3502 %} 3503 ins_pipe( pipe_slow ); 3504 %} 3505 3506 // Load vectors (64 bytes long) 3507 instruct loadV64_qword(vecZ dst, memory mem) %{ 3508 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 3509 match(Set dst (LoadVector mem)); 3510 ins_cost(125); 3511 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 3512 ins_encode %{ 3513 int vector_len = 2; 3514 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 3515 %} 3516 ins_pipe( pipe_slow ); 3517 %} 3518 3519 // Store vectors 3520 instruct storeV1(memory mem, vecS src, rRegI tmp) %{ 3521 predicate(n->as_StoreVector()->memory_size() == 1); 3522 match(Set mem (StoreVector mem src)); 3523 ins_cost(145); 3524 effect(TEMP tmp); 3525 format %{ "movd $tmp,$src\n\t" 3526 "movb $mem,$tmp\t! store vector (1 byte)" %} 3527 ins_encode %{ 3528 __ movdl($tmp$$Register, $src$$XMMRegister); 3529 __ movb($mem$$Address, $tmp$$Register); 3530 %} 3531 ins_pipe( pipe_slow ); 3532 %} 3533 3534 instruct storeV2(memory mem, vecS src, rRegI tmp) %{ 3535 predicate(n->as_StoreVector()->memory_size() == 2); 3536 match(Set mem (StoreVector mem src)); 3537 ins_cost(145); 3538 effect(TEMP tmp); 3539 format %{ "movd $tmp,$src\n\t" 3540 "movw $mem,$tmp\t! store vector (2 bytes)" %} 3541 ins_encode %{ 3542 __ movdl($tmp$$Register, $src$$XMMRegister); 3543 __ movw($mem$$Address, $tmp$$Register); 3544 %} 3545 ins_pipe( pipe_slow ); 3546 %} 3547 3548 instruct storeV4(memory mem, vecS src) %{ 3549 predicate(n->as_StoreVector()->memory_size() == 4); 3550 match(Set mem (StoreVector mem src)); 3551 ins_cost(145); 3552 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 3553 ins_encode %{ 3554 __ movdl($mem$$Address, $src$$XMMRegister); 3555 %} 3556 ins_pipe( pipe_slow ); 3557 %} 3558 3559 instruct storeV8(memory mem, vecD src) %{ 3560 predicate(n->as_StoreVector()->memory_size() == 8); 3561 match(Set mem (StoreVector mem src)); 3562 ins_cost(145); 3563 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 3564 ins_encode %{ 3565 __ movq($mem$$Address, $src$$XMMRegister); 3566 %} 3567 ins_pipe( pipe_slow ); 3568 %} 3569 3570 instruct storeV16(memory mem, vecX src) %{ 3571 predicate(n->as_StoreVector()->memory_size() == 16); 3572 match(Set mem (StoreVector mem src)); 3573 ins_cost(145); 3574 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 3575 ins_encode %{ 3576 __ movdqu($mem$$Address, $src$$XMMRegister); 3577 %} 3578 ins_pipe( pipe_slow ); 3579 %} 3580 3581 instruct storeV32(memory mem, vecY src) %{ 3582 predicate(n->as_StoreVector()->memory_size() == 32); 3583 match(Set mem (StoreVector mem src)); 3584 ins_cost(145); 3585 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 3586 ins_encode %{ 3587 __ vmovdqu($mem$$Address, $src$$XMMRegister); 3588 %} 3589 ins_pipe( pipe_slow ); 3590 %} 3591 3592 instruct storeV64_dword(memory mem, vecZ src) %{ 3593 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 3594 match(Set mem (StoreVector mem src)); 3595 ins_cost(145); 3596 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 3597 ins_encode %{ 3598 int vector_len = 2; 3599 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 3600 %} 3601 ins_pipe( pipe_slow ); 3602 %} 3603 3604 instruct storeV64_qword(memory mem, vecZ src) %{ 3605 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 3606 match(Set mem (StoreVector mem src)); 3607 ins_cost(145); 3608 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 3609 ins_encode %{ 3610 int vector_len = 2; 3611 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 3612 %} 3613 ins_pipe( pipe_slow ); 3614 %} 3615 3616 // ====================LEGACY REPLICATE======================================= 3617 3618 instruct Repl4B_mem(vecS dst, memory mem) %{ 3619 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3620 match(Set dst (ReplicateB (LoadB mem))); 3621 format %{ "punpcklbw $dst,$mem\n\t" 3622 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3623 ins_encode %{ 3624 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3625 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3626 %} 3627 ins_pipe( pipe_slow ); 3628 %} 3629 3630 instruct Repl8B_mem(vecD dst, memory mem) %{ 3631 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3632 match(Set dst (ReplicateB (LoadB mem))); 3633 format %{ "punpcklbw $dst,$mem\n\t" 3634 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3635 ins_encode %{ 3636 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3637 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3638 %} 3639 ins_pipe( pipe_slow ); 3640 %} 3641 3642 instruct Repl16B(vecX dst, rRegI src) %{ 3643 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3644 match(Set dst (ReplicateB src)); 3645 format %{ "movd $dst,$src\n\t" 3646 "punpcklbw $dst,$dst\n\t" 3647 "pshuflw $dst,$dst,0x00\n\t" 3648 "punpcklqdq $dst,$dst\t! replicate16B" %} 3649 ins_encode %{ 3650 __ movdl($dst$$XMMRegister, $src$$Register); 3651 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3652 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3653 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3654 %} 3655 ins_pipe( pipe_slow ); 3656 %} 3657 3658 instruct Repl16B_mem(vecX dst, memory mem) %{ 3659 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3660 match(Set dst (ReplicateB (LoadB mem))); 3661 format %{ "punpcklbw $dst,$mem\n\t" 3662 "pshuflw $dst,$dst,0x00\n\t" 3663 "punpcklqdq $dst,$dst\t! replicate16B" %} 3664 ins_encode %{ 3665 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3666 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3667 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3668 %} 3669 ins_pipe( pipe_slow ); 3670 %} 3671 3672 instruct Repl32B(vecY dst, rRegI src) %{ 3673 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3674 match(Set dst (ReplicateB src)); 3675 format %{ "movd $dst,$src\n\t" 3676 "punpcklbw $dst,$dst\n\t" 3677 "pshuflw $dst,$dst,0x00\n\t" 3678 "punpcklqdq $dst,$dst\n\t" 3679 "vinserti128_high $dst,$dst\t! replicate32B" %} 3680 ins_encode %{ 3681 __ movdl($dst$$XMMRegister, $src$$Register); 3682 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3683 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3684 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3685 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3686 %} 3687 ins_pipe( pipe_slow ); 3688 %} 3689 3690 instruct Repl32B_mem(vecY dst, memory mem) %{ 3691 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3692 match(Set dst (ReplicateB (LoadB mem))); 3693 format %{ "punpcklbw $dst,$mem\n\t" 3694 "pshuflw $dst,$dst,0x00\n\t" 3695 "punpcklqdq $dst,$dst\n\t" 3696 "vinserti128_high $dst,$dst\t! replicate32B" %} 3697 ins_encode %{ 3698 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3699 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3700 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3701 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3702 %} 3703 ins_pipe( pipe_slow ); 3704 %} 3705 3706 instruct Repl16B_imm(vecX dst, immI con) %{ 3707 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3708 match(Set dst (ReplicateB con)); 3709 format %{ "movq $dst,[$constantaddress]\n\t" 3710 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3711 ins_encode %{ 3712 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3713 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3714 %} 3715 ins_pipe( pipe_slow ); 3716 %} 3717 3718 instruct Repl32B_imm(vecY dst, immI con) %{ 3719 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3720 match(Set dst (ReplicateB con)); 3721 format %{ "movq $dst,[$constantaddress]\n\t" 3722 "punpcklqdq $dst,$dst\n\t" 3723 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3724 ins_encode %{ 3725 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3726 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3727 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3728 %} 3729 ins_pipe( pipe_slow ); 3730 %} 3731 3732 instruct Repl4S(vecD dst, rRegI src) %{ 3733 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3734 match(Set dst (ReplicateS src)); 3735 format %{ "movd $dst,$src\n\t" 3736 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3737 ins_encode %{ 3738 __ movdl($dst$$XMMRegister, $src$$Register); 3739 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3740 %} 3741 ins_pipe( pipe_slow ); 3742 %} 3743 3744 instruct Repl4S_mem(vecD dst, memory mem) %{ 3745 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3746 match(Set dst (ReplicateS (LoadS mem))); 3747 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3748 ins_encode %{ 3749 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3750 %} 3751 ins_pipe( pipe_slow ); 3752 %} 3753 3754 instruct Repl8S(vecX dst, rRegI src) %{ 3755 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3756 match(Set dst (ReplicateS src)); 3757 format %{ "movd $dst,$src\n\t" 3758 "pshuflw $dst,$dst,0x00\n\t" 3759 "punpcklqdq $dst,$dst\t! replicate8S" %} 3760 ins_encode %{ 3761 __ movdl($dst$$XMMRegister, $src$$Register); 3762 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3763 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3764 %} 3765 ins_pipe( pipe_slow ); 3766 %} 3767 3768 instruct Repl8S_mem(vecX dst, memory mem) %{ 3769 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3770 match(Set dst (ReplicateS (LoadS mem))); 3771 format %{ "pshuflw $dst,$mem,0x00\n\t" 3772 "punpcklqdq $dst,$dst\t! replicate8S" %} 3773 ins_encode %{ 3774 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3775 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct Repl8S_imm(vecX dst, immI con) %{ 3781 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3782 match(Set dst (ReplicateS con)); 3783 format %{ "movq $dst,[$constantaddress]\n\t" 3784 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3785 ins_encode %{ 3786 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3787 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3788 %} 3789 ins_pipe( pipe_slow ); 3790 %} 3791 3792 instruct Repl16S(vecY dst, rRegI src) %{ 3793 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3794 match(Set dst (ReplicateS src)); 3795 format %{ "movd $dst,$src\n\t" 3796 "pshuflw $dst,$dst,0x00\n\t" 3797 "punpcklqdq $dst,$dst\n\t" 3798 "vinserti128_high $dst,$dst\t! replicate16S" %} 3799 ins_encode %{ 3800 __ movdl($dst$$XMMRegister, $src$$Register); 3801 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3802 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3803 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3804 %} 3805 ins_pipe( pipe_slow ); 3806 %} 3807 3808 instruct Repl16S_mem(vecY dst, memory mem) %{ 3809 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3810 match(Set dst (ReplicateS (LoadS mem))); 3811 format %{ "pshuflw $dst,$mem,0x00\n\t" 3812 "punpcklqdq $dst,$dst\n\t" 3813 "vinserti128_high $dst,$dst\t! replicate16S" %} 3814 ins_encode %{ 3815 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3816 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3817 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3818 %} 3819 ins_pipe( pipe_slow ); 3820 %} 3821 3822 instruct Repl16S_imm(vecY dst, immI con) %{ 3823 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3824 match(Set dst (ReplicateS con)); 3825 format %{ "movq $dst,[$constantaddress]\n\t" 3826 "punpcklqdq $dst,$dst\n\t" 3827 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3828 ins_encode %{ 3829 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3830 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3831 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 instruct Repl4I(vecX dst, rRegI src) %{ 3837 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3838 match(Set dst (ReplicateI src)); 3839 format %{ "movd $dst,$src\n\t" 3840 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3841 ins_encode %{ 3842 __ movdl($dst$$XMMRegister, $src$$Register); 3843 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3844 %} 3845 ins_pipe( pipe_slow ); 3846 %} 3847 3848 instruct Repl4I_mem(vecX dst, memory mem) %{ 3849 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3850 match(Set dst (ReplicateI (LoadI mem))); 3851 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3852 ins_encode %{ 3853 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct Repl8I(vecY dst, rRegI src) %{ 3859 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3860 match(Set dst (ReplicateI src)); 3861 format %{ "movd $dst,$src\n\t" 3862 "pshufd $dst,$dst,0x00\n\t" 3863 "vinserti128_high $dst,$dst\t! replicate8I" %} 3864 ins_encode %{ 3865 __ movdl($dst$$XMMRegister, $src$$Register); 3866 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3867 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 instruct Repl8I_mem(vecY dst, memory mem) %{ 3873 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3874 match(Set dst (ReplicateI (LoadI mem))); 3875 format %{ "pshufd $dst,$mem,0x00\n\t" 3876 "vinserti128_high $dst,$dst\t! replicate8I" %} 3877 ins_encode %{ 3878 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3879 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3880 %} 3881 ins_pipe( pipe_slow ); 3882 %} 3883 3884 instruct Repl4I_imm(vecX dst, immI con) %{ 3885 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3886 match(Set dst (ReplicateI con)); 3887 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3888 "punpcklqdq $dst,$dst" %} 3889 ins_encode %{ 3890 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3891 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3892 %} 3893 ins_pipe( pipe_slow ); 3894 %} 3895 3896 instruct Repl8I_imm(vecY dst, immI con) %{ 3897 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3898 match(Set dst (ReplicateI con)); 3899 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3900 "punpcklqdq $dst,$dst\n\t" 3901 "vinserti128_high $dst,$dst" %} 3902 ins_encode %{ 3903 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3904 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3905 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3906 %} 3907 ins_pipe( pipe_slow ); 3908 %} 3909 3910 // Long could be loaded into xmm register directly from memory. 3911 instruct Repl2L_mem(vecX dst, memory mem) %{ 3912 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3913 match(Set dst (ReplicateL (LoadL mem))); 3914 format %{ "movq $dst,$mem\n\t" 3915 "punpcklqdq $dst,$dst\t! replicate2L" %} 3916 ins_encode %{ 3917 __ movq($dst$$XMMRegister, $mem$$Address); 3918 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 // Replicate long (8 byte) scalar to be vector 3924 #ifdef _LP64 3925 instruct Repl4L(vecY dst, rRegL src) %{ 3926 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3927 match(Set dst (ReplicateL src)); 3928 format %{ "movdq $dst,$src\n\t" 3929 "punpcklqdq $dst,$dst\n\t" 3930 "vinserti128_high $dst,$dst\t! replicate4L" %} 3931 ins_encode %{ 3932 __ movdq($dst$$XMMRegister, $src$$Register); 3933 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3934 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3935 %} 3936 ins_pipe( pipe_slow ); 3937 %} 3938 #else // _LP64 3939 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3940 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3941 match(Set dst (ReplicateL src)); 3942 effect(TEMP dst, USE src, TEMP tmp); 3943 format %{ "movdl $dst,$src.lo\n\t" 3944 "movdl $tmp,$src.hi\n\t" 3945 "punpckldq $dst,$tmp\n\t" 3946 "punpcklqdq $dst,$dst\n\t" 3947 "vinserti128_high $dst,$dst\t! replicate4L" %} 3948 ins_encode %{ 3949 __ movdl($dst$$XMMRegister, $src$$Register); 3950 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3951 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3952 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3953 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3954 %} 3955 ins_pipe( pipe_slow ); 3956 %} 3957 #endif // _LP64 3958 3959 instruct Repl4L_imm(vecY dst, immL con) %{ 3960 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3961 match(Set dst (ReplicateL con)); 3962 format %{ "movq $dst,[$constantaddress]\n\t" 3963 "punpcklqdq $dst,$dst\n\t" 3964 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3965 ins_encode %{ 3966 __ movq($dst$$XMMRegister, $constantaddress($con)); 3967 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3968 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3969 %} 3970 ins_pipe( pipe_slow ); 3971 %} 3972 3973 instruct Repl4L_mem(vecY dst, memory mem) %{ 3974 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3975 match(Set dst (ReplicateL (LoadL mem))); 3976 format %{ "movq $dst,$mem\n\t" 3977 "punpcklqdq $dst,$dst\n\t" 3978 "vinserti128_high $dst,$dst\t! replicate4L" %} 3979 ins_encode %{ 3980 __ movq($dst$$XMMRegister, $mem$$Address); 3981 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3982 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3983 %} 3984 ins_pipe( pipe_slow ); 3985 %} 3986 3987 instruct Repl2F_mem(vecD dst, memory mem) %{ 3988 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3989 match(Set dst (ReplicateF (LoadF mem))); 3990 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3991 ins_encode %{ 3992 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3993 %} 3994 ins_pipe( pipe_slow ); 3995 %} 3996 3997 instruct Repl4F_mem(vecX dst, memory mem) %{ 3998 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3999 match(Set dst (ReplicateF (LoadF mem))); 4000 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 4001 ins_encode %{ 4002 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 4003 %} 4004 ins_pipe( pipe_slow ); 4005 %} 4006 4007 instruct Repl8F(vecY dst, regF src) %{ 4008 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 4009 match(Set dst (ReplicateF src)); 4010 format %{ "pshufd $dst,$src,0x00\n\t" 4011 "vinsertf128_high $dst,$dst\t! replicate8F" %} 4012 ins_encode %{ 4013 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4014 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 instruct Repl8F_mem(vecY dst, memory mem) %{ 4020 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 4021 match(Set dst (ReplicateF (LoadF mem))); 4022 format %{ "pshufd $dst,$mem,0x00\n\t" 4023 "vinsertf128_high $dst,$dst\t! replicate8F" %} 4024 ins_encode %{ 4025 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 4026 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 4032 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 4033 match(Set dst (ReplicateF zero)); 4034 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 4035 ins_encode %{ 4036 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4037 %} 4038 ins_pipe( fpu_reg_reg ); 4039 %} 4040 4041 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 4042 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 4043 match(Set dst (ReplicateF zero)); 4044 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 4045 ins_encode %{ 4046 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4047 %} 4048 ins_pipe( fpu_reg_reg ); 4049 %} 4050 4051 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 4052 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 4053 match(Set dst (ReplicateF zero)); 4054 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 4055 ins_encode %{ 4056 int vector_len = 1; 4057 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4058 %} 4059 ins_pipe( fpu_reg_reg ); 4060 %} 4061 4062 instruct Repl2D_mem(vecX dst, memory mem) %{ 4063 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 4064 match(Set dst (ReplicateD (LoadD mem))); 4065 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 4066 ins_encode %{ 4067 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 4068 %} 4069 ins_pipe( pipe_slow ); 4070 %} 4071 4072 instruct Repl4D(vecY dst, regD src) %{ 4073 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 4074 match(Set dst (ReplicateD src)); 4075 format %{ "pshufd $dst,$src,0x44\n\t" 4076 "vinsertf128_high $dst,$dst\t! replicate4D" %} 4077 ins_encode %{ 4078 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4079 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4080 %} 4081 ins_pipe( pipe_slow ); 4082 %} 4083 4084 instruct Repl4D_mem(vecY dst, memory mem) %{ 4085 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 4086 match(Set dst (ReplicateD (LoadD mem))); 4087 format %{ "pshufd $dst,$mem,0x44\n\t" 4088 "vinsertf128_high $dst,$dst\t! replicate4D" %} 4089 ins_encode %{ 4090 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 4091 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4092 %} 4093 ins_pipe( pipe_slow ); 4094 %} 4095 4096 // Replicate double (8 byte) scalar zero to be vector 4097 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 4098 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 4099 match(Set dst (ReplicateD zero)); 4100 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 4101 ins_encode %{ 4102 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4103 %} 4104 ins_pipe( fpu_reg_reg ); 4105 %} 4106 4107 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 4108 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 4109 match(Set dst (ReplicateD zero)); 4110 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 4111 ins_encode %{ 4112 int vector_len = 1; 4113 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4114 %} 4115 ins_pipe( fpu_reg_reg ); 4116 %} 4117 4118 // ====================GENERIC REPLICATE========================================== 4119 4120 // Replicate byte scalar to be vector 4121 instruct Repl4B(vecS dst, rRegI src) %{ 4122 predicate(n->as_Vector()->length() == 4); 4123 match(Set dst (ReplicateB src)); 4124 format %{ "movd $dst,$src\n\t" 4125 "punpcklbw $dst,$dst\n\t" 4126 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 4127 ins_encode %{ 4128 __ movdl($dst$$XMMRegister, $src$$Register); 4129 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4130 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4131 %} 4132 ins_pipe( pipe_slow ); 4133 %} 4134 4135 instruct Repl8B(vecD dst, rRegI src) %{ 4136 predicate(n->as_Vector()->length() == 8); 4137 match(Set dst (ReplicateB src)); 4138 format %{ "movd $dst,$src\n\t" 4139 "punpcklbw $dst,$dst\n\t" 4140 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 4141 ins_encode %{ 4142 __ movdl($dst$$XMMRegister, $src$$Register); 4143 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4144 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4145 %} 4146 ins_pipe( pipe_slow ); 4147 %} 4148 4149 // Replicate byte scalar immediate to be vector by loading from const table. 4150 instruct Repl4B_imm(vecS dst, immI con) %{ 4151 predicate(n->as_Vector()->length() == 4); 4152 match(Set dst (ReplicateB con)); 4153 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 4154 ins_encode %{ 4155 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 4156 %} 4157 ins_pipe( pipe_slow ); 4158 %} 4159 4160 instruct Repl8B_imm(vecD dst, immI con) %{ 4161 predicate(n->as_Vector()->length() == 8); 4162 match(Set dst (ReplicateB con)); 4163 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 4164 ins_encode %{ 4165 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4166 %} 4167 ins_pipe( pipe_slow ); 4168 %} 4169 4170 // Replicate byte scalar zero to be vector 4171 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 4172 predicate(n->as_Vector()->length() == 4); 4173 match(Set dst (ReplicateB zero)); 4174 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 4175 ins_encode %{ 4176 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4177 %} 4178 ins_pipe( fpu_reg_reg ); 4179 %} 4180 4181 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 4182 predicate(n->as_Vector()->length() == 8); 4183 match(Set dst (ReplicateB zero)); 4184 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 4185 ins_encode %{ 4186 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4187 %} 4188 ins_pipe( fpu_reg_reg ); 4189 %} 4190 4191 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 4192 predicate(n->as_Vector()->length() == 16); 4193 match(Set dst (ReplicateB zero)); 4194 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 4195 ins_encode %{ 4196 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4197 %} 4198 ins_pipe( fpu_reg_reg ); 4199 %} 4200 4201 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 4202 predicate(n->as_Vector()->length() == 32); 4203 match(Set dst (ReplicateB zero)); 4204 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 4205 ins_encode %{ 4206 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4207 int vector_len = 1; 4208 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4209 %} 4210 ins_pipe( fpu_reg_reg ); 4211 %} 4212 4213 // Replicate char/short (2 byte) scalar to be vector 4214 instruct Repl2S(vecS dst, rRegI src) %{ 4215 predicate(n->as_Vector()->length() == 2); 4216 match(Set dst (ReplicateS src)); 4217 format %{ "movd $dst,$src\n\t" 4218 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 4219 ins_encode %{ 4220 __ movdl($dst$$XMMRegister, $src$$Register); 4221 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4222 %} 4223 ins_pipe( fpu_reg_reg ); 4224 %} 4225 4226 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 4227 instruct Repl2S_imm(vecS dst, immI con) %{ 4228 predicate(n->as_Vector()->length() == 2); 4229 match(Set dst (ReplicateS con)); 4230 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 4231 ins_encode %{ 4232 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 4233 %} 4234 ins_pipe( fpu_reg_reg ); 4235 %} 4236 4237 instruct Repl4S_imm(vecD dst, immI con) %{ 4238 predicate(n->as_Vector()->length() == 4); 4239 match(Set dst (ReplicateS con)); 4240 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 4241 ins_encode %{ 4242 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4243 %} 4244 ins_pipe( fpu_reg_reg ); 4245 %} 4246 4247 // Replicate char/short (2 byte) scalar zero to be vector 4248 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 4249 predicate(n->as_Vector()->length() == 2); 4250 match(Set dst (ReplicateS zero)); 4251 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 4252 ins_encode %{ 4253 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4254 %} 4255 ins_pipe( fpu_reg_reg ); 4256 %} 4257 4258 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 4259 predicate(n->as_Vector()->length() == 4); 4260 match(Set dst (ReplicateS zero)); 4261 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 4262 ins_encode %{ 4263 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4264 %} 4265 ins_pipe( fpu_reg_reg ); 4266 %} 4267 4268 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 4269 predicate(n->as_Vector()->length() == 8); 4270 match(Set dst (ReplicateS zero)); 4271 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 4272 ins_encode %{ 4273 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4274 %} 4275 ins_pipe( fpu_reg_reg ); 4276 %} 4277 4278 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 4279 predicate(n->as_Vector()->length() == 16); 4280 match(Set dst (ReplicateS zero)); 4281 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 4282 ins_encode %{ 4283 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4284 int vector_len = 1; 4285 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4286 %} 4287 ins_pipe( fpu_reg_reg ); 4288 %} 4289 4290 // Replicate integer (4 byte) scalar to be vector 4291 instruct Repl2I(vecD dst, rRegI src) %{ 4292 predicate(n->as_Vector()->length() == 2); 4293 match(Set dst (ReplicateI src)); 4294 format %{ "movd $dst,$src\n\t" 4295 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4296 ins_encode %{ 4297 __ movdl($dst$$XMMRegister, $src$$Register); 4298 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4299 %} 4300 ins_pipe( fpu_reg_reg ); 4301 %} 4302 4303 // Integer could be loaded into xmm register directly from memory. 4304 instruct Repl2I_mem(vecD dst, memory mem) %{ 4305 predicate(n->as_Vector()->length() == 2); 4306 match(Set dst (ReplicateI (LoadI mem))); 4307 format %{ "movd $dst,$mem\n\t" 4308 "pshufd $dst,$dst,0x00\t! replicate2I" %} 4309 ins_encode %{ 4310 __ movdl($dst$$XMMRegister, $mem$$Address); 4311 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4312 %} 4313 ins_pipe( fpu_reg_reg ); 4314 %} 4315 4316 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 4317 instruct Repl2I_imm(vecD dst, immI con) %{ 4318 predicate(n->as_Vector()->length() == 2); 4319 match(Set dst (ReplicateI con)); 4320 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 4321 ins_encode %{ 4322 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4323 %} 4324 ins_pipe( fpu_reg_reg ); 4325 %} 4326 4327 // Replicate integer (4 byte) scalar zero to be vector 4328 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 4329 predicate(n->as_Vector()->length() == 2); 4330 match(Set dst (ReplicateI zero)); 4331 format %{ "pxor $dst,$dst\t! replicate2I" %} 4332 ins_encode %{ 4333 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4334 %} 4335 ins_pipe( fpu_reg_reg ); 4336 %} 4337 4338 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 4339 predicate(n->as_Vector()->length() == 4); 4340 match(Set dst (ReplicateI zero)); 4341 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 4342 ins_encode %{ 4343 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4344 %} 4345 ins_pipe( fpu_reg_reg ); 4346 %} 4347 4348 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 4349 predicate(n->as_Vector()->length() == 8); 4350 match(Set dst (ReplicateI zero)); 4351 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 4352 ins_encode %{ 4353 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4354 int vector_len = 1; 4355 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4356 %} 4357 ins_pipe( fpu_reg_reg ); 4358 %} 4359 4360 // Replicate long (8 byte) scalar to be vector 4361 #ifdef _LP64 4362 instruct Repl2L(vecX dst, rRegL src) %{ 4363 predicate(n->as_Vector()->length() == 2); 4364 match(Set dst (ReplicateL src)); 4365 format %{ "movdq $dst,$src\n\t" 4366 "punpcklqdq $dst,$dst\t! replicate2L" %} 4367 ins_encode %{ 4368 __ movdq($dst$$XMMRegister, $src$$Register); 4369 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4370 %} 4371 ins_pipe( pipe_slow ); 4372 %} 4373 #else // _LP64 4374 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 4375 predicate(n->as_Vector()->length() == 2); 4376 match(Set dst (ReplicateL src)); 4377 effect(TEMP dst, USE src, TEMP tmp); 4378 format %{ "movdl $dst,$src.lo\n\t" 4379 "movdl $tmp,$src.hi\n\t" 4380 "punpckldq $dst,$tmp\n\t" 4381 "punpcklqdq $dst,$dst\t! replicate2L"%} 4382 ins_encode %{ 4383 __ movdl($dst$$XMMRegister, $src$$Register); 4384 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4385 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4386 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4387 %} 4388 ins_pipe( pipe_slow ); 4389 %} 4390 #endif // _LP64 4391 4392 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4393 instruct Repl2L_imm(vecX dst, immL con) %{ 4394 predicate(n->as_Vector()->length() == 2); 4395 match(Set dst (ReplicateL con)); 4396 format %{ "movq $dst,[$constantaddress]\n\t" 4397 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 4398 ins_encode %{ 4399 __ movq($dst$$XMMRegister, $constantaddress($con)); 4400 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4401 %} 4402 ins_pipe( pipe_slow ); 4403 %} 4404 4405 // Replicate long (8 byte) scalar zero to be vector 4406 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 4407 predicate(n->as_Vector()->length() == 2); 4408 match(Set dst (ReplicateL zero)); 4409 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 4410 ins_encode %{ 4411 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4412 %} 4413 ins_pipe( fpu_reg_reg ); 4414 %} 4415 4416 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 4417 predicate(n->as_Vector()->length() == 4); 4418 match(Set dst (ReplicateL zero)); 4419 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 4420 ins_encode %{ 4421 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 4422 int vector_len = 1; 4423 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4424 %} 4425 ins_pipe( fpu_reg_reg ); 4426 %} 4427 4428 // Replicate float (4 byte) scalar to be vector 4429 instruct Repl2F(vecD dst, regF src) %{ 4430 predicate(n->as_Vector()->length() == 2); 4431 match(Set dst (ReplicateF src)); 4432 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 4433 ins_encode %{ 4434 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4435 %} 4436 ins_pipe( fpu_reg_reg ); 4437 %} 4438 4439 instruct Repl4F(vecX dst, regF src) %{ 4440 predicate(n->as_Vector()->length() == 4); 4441 match(Set dst (ReplicateF src)); 4442 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 4443 ins_encode %{ 4444 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4445 %} 4446 ins_pipe( pipe_slow ); 4447 %} 4448 4449 // Replicate double (8 bytes) scalar to be vector 4450 instruct Repl2D(vecX dst, regD src) %{ 4451 predicate(n->as_Vector()->length() == 2); 4452 match(Set dst (ReplicateD src)); 4453 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 4454 ins_encode %{ 4455 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4456 %} 4457 ins_pipe( pipe_slow ); 4458 %} 4459 4460 // ====================EVEX REPLICATE============================================= 4461 4462 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 4463 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4464 match(Set dst (ReplicateB (LoadB mem))); 4465 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 4466 ins_encode %{ 4467 int vector_len = 0; 4468 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4469 %} 4470 ins_pipe( pipe_slow ); 4471 %} 4472 4473 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 4474 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4475 match(Set dst (ReplicateB (LoadB mem))); 4476 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 4477 ins_encode %{ 4478 int vector_len = 0; 4479 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4480 %} 4481 ins_pipe( pipe_slow ); 4482 %} 4483 4484 instruct Repl16B_evex(vecX dst, rRegI src) %{ 4485 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4486 match(Set dst (ReplicateB src)); 4487 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 4488 ins_encode %{ 4489 int vector_len = 0; 4490 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 4496 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4497 match(Set dst (ReplicateB (LoadB mem))); 4498 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 4499 ins_encode %{ 4500 int vector_len = 0; 4501 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct Repl32B_evex(vecY dst, rRegI src) %{ 4507 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4508 match(Set dst (ReplicateB src)); 4509 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 4510 ins_encode %{ 4511 int vector_len = 1; 4512 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 4518 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4519 match(Set dst (ReplicateB (LoadB mem))); 4520 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 4521 ins_encode %{ 4522 int vector_len = 1; 4523 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4524 %} 4525 ins_pipe( pipe_slow ); 4526 %} 4527 4528 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 4529 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4530 match(Set dst (ReplicateB src)); 4531 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 4532 ins_encode %{ 4533 int vector_len = 2; 4534 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 4535 %} 4536 ins_pipe( pipe_slow ); 4537 %} 4538 4539 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 4540 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4541 match(Set dst (ReplicateB (LoadB mem))); 4542 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 4543 ins_encode %{ 4544 int vector_len = 2; 4545 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 4546 %} 4547 ins_pipe( pipe_slow ); 4548 %} 4549 4550 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 4551 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4552 match(Set dst (ReplicateB con)); 4553 format %{ "movq $dst,[$constantaddress]\n\t" 4554 "vpbroadcastb $dst,$dst\t! replicate16B" %} 4555 ins_encode %{ 4556 int vector_len = 0; 4557 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4558 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4559 %} 4560 ins_pipe( pipe_slow ); 4561 %} 4562 4563 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 4564 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 4565 match(Set dst (ReplicateB con)); 4566 format %{ "movq $dst,[$constantaddress]\n\t" 4567 "vpbroadcastb $dst,$dst\t! replicate32B" %} 4568 ins_encode %{ 4569 int vector_len = 1; 4570 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4571 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4572 %} 4573 ins_pipe( pipe_slow ); 4574 %} 4575 4576 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 4577 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 4578 match(Set dst (ReplicateB con)); 4579 format %{ "movq $dst,[$constantaddress]\n\t" 4580 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 4581 ins_encode %{ 4582 int vector_len = 2; 4583 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 4584 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 4590 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 4591 match(Set dst (ReplicateB zero)); 4592 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 4593 ins_encode %{ 4594 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4595 int vector_len = 2; 4596 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4597 %} 4598 ins_pipe( fpu_reg_reg ); 4599 %} 4600 4601 instruct Repl4S_evex(vecD dst, rRegI src) %{ 4602 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4603 match(Set dst (ReplicateS src)); 4604 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 4605 ins_encode %{ 4606 int vector_len = 0; 4607 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4608 %} 4609 ins_pipe( pipe_slow ); 4610 %} 4611 4612 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 4613 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 4614 match(Set dst (ReplicateS (LoadS mem))); 4615 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 4616 ins_encode %{ 4617 int vector_len = 0; 4618 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4619 %} 4620 ins_pipe( pipe_slow ); 4621 %} 4622 4623 instruct Repl8S_evex(vecX dst, rRegI src) %{ 4624 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4625 match(Set dst (ReplicateS src)); 4626 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 4627 ins_encode %{ 4628 int vector_len = 0; 4629 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4630 %} 4631 ins_pipe( pipe_slow ); 4632 %} 4633 4634 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 4635 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4636 match(Set dst (ReplicateS (LoadS mem))); 4637 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4638 ins_encode %{ 4639 int vector_len = 0; 4640 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4641 %} 4642 ins_pipe( pipe_slow ); 4643 %} 4644 4645 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4646 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4647 match(Set dst (ReplicateS src)); 4648 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4649 ins_encode %{ 4650 int vector_len = 1; 4651 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4652 %} 4653 ins_pipe( pipe_slow ); 4654 %} 4655 4656 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4657 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4658 match(Set dst (ReplicateS (LoadS mem))); 4659 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4660 ins_encode %{ 4661 int vector_len = 1; 4662 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4663 %} 4664 ins_pipe( pipe_slow ); 4665 %} 4666 4667 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4668 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4669 match(Set dst (ReplicateS src)); 4670 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4671 ins_encode %{ 4672 int vector_len = 2; 4673 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4674 %} 4675 ins_pipe( pipe_slow ); 4676 %} 4677 4678 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4679 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4680 match(Set dst (ReplicateS (LoadS mem))); 4681 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4682 ins_encode %{ 4683 int vector_len = 2; 4684 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4690 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4691 match(Set dst (ReplicateS con)); 4692 format %{ "movq $dst,[$constantaddress]\n\t" 4693 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4694 ins_encode %{ 4695 int vector_len = 0; 4696 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4697 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4698 %} 4699 ins_pipe( pipe_slow ); 4700 %} 4701 4702 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4703 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4704 match(Set dst (ReplicateS con)); 4705 format %{ "movq $dst,[$constantaddress]\n\t" 4706 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4707 ins_encode %{ 4708 int vector_len = 1; 4709 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4710 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4711 %} 4712 ins_pipe( pipe_slow ); 4713 %} 4714 4715 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4716 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4717 match(Set dst (ReplicateS con)); 4718 format %{ "movq $dst,[$constantaddress]\n\t" 4719 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4720 ins_encode %{ 4721 int vector_len = 2; 4722 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4723 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4724 %} 4725 ins_pipe( pipe_slow ); 4726 %} 4727 4728 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4729 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4730 match(Set dst (ReplicateS zero)); 4731 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4732 ins_encode %{ 4733 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4734 int vector_len = 2; 4735 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4736 %} 4737 ins_pipe( fpu_reg_reg ); 4738 %} 4739 4740 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4741 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4742 match(Set dst (ReplicateI src)); 4743 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4744 ins_encode %{ 4745 int vector_len = 0; 4746 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4747 %} 4748 ins_pipe( pipe_slow ); 4749 %} 4750 4751 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4752 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4753 match(Set dst (ReplicateI (LoadI mem))); 4754 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4755 ins_encode %{ 4756 int vector_len = 0; 4757 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4758 %} 4759 ins_pipe( pipe_slow ); 4760 %} 4761 4762 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4763 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4764 match(Set dst (ReplicateI src)); 4765 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4766 ins_encode %{ 4767 int vector_len = 1; 4768 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4769 %} 4770 ins_pipe( pipe_slow ); 4771 %} 4772 4773 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4774 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4775 match(Set dst (ReplicateI (LoadI mem))); 4776 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4777 ins_encode %{ 4778 int vector_len = 1; 4779 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4785 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4786 match(Set dst (ReplicateI src)); 4787 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4788 ins_encode %{ 4789 int vector_len = 2; 4790 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4791 %} 4792 ins_pipe( pipe_slow ); 4793 %} 4794 4795 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4796 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4797 match(Set dst (ReplicateI (LoadI mem))); 4798 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4799 ins_encode %{ 4800 int vector_len = 2; 4801 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4802 %} 4803 ins_pipe( pipe_slow ); 4804 %} 4805 4806 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4807 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4808 match(Set dst (ReplicateI con)); 4809 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4810 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4811 ins_encode %{ 4812 int vector_len = 0; 4813 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4814 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4815 %} 4816 ins_pipe( pipe_slow ); 4817 %} 4818 4819 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4820 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4821 match(Set dst (ReplicateI con)); 4822 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4823 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4824 ins_encode %{ 4825 int vector_len = 1; 4826 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4827 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4833 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4834 match(Set dst (ReplicateI con)); 4835 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4836 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4837 ins_encode %{ 4838 int vector_len = 2; 4839 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4840 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4841 %} 4842 ins_pipe( pipe_slow ); 4843 %} 4844 4845 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4846 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4847 match(Set dst (ReplicateI zero)); 4848 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4849 ins_encode %{ 4850 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4851 int vector_len = 2; 4852 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4853 %} 4854 ins_pipe( fpu_reg_reg ); 4855 %} 4856 4857 // Replicate long (8 byte) scalar to be vector 4858 #ifdef _LP64 4859 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4860 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4861 match(Set dst (ReplicateL src)); 4862 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4863 ins_encode %{ 4864 int vector_len = 1; 4865 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4866 %} 4867 ins_pipe( pipe_slow ); 4868 %} 4869 4870 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4871 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4872 match(Set dst (ReplicateL src)); 4873 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4874 ins_encode %{ 4875 int vector_len = 2; 4876 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4877 %} 4878 ins_pipe( pipe_slow ); 4879 %} 4880 #else // _LP64 4881 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4882 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4883 match(Set dst (ReplicateL src)); 4884 effect(TEMP dst, USE src, TEMP tmp); 4885 format %{ "movdl $dst,$src.lo\n\t" 4886 "movdl $tmp,$src.hi\n\t" 4887 "punpckldq $dst,$tmp\n\t" 4888 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4889 ins_encode %{ 4890 int vector_len = 1; 4891 __ movdl($dst$$XMMRegister, $src$$Register); 4892 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4893 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4894 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4900 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4901 match(Set dst (ReplicateL src)); 4902 effect(TEMP dst, USE src, TEMP tmp); 4903 format %{ "movdl $dst,$src.lo\n\t" 4904 "movdl $tmp,$src.hi\n\t" 4905 "punpckldq $dst,$tmp\n\t" 4906 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4907 ins_encode %{ 4908 int vector_len = 2; 4909 __ movdl($dst$$XMMRegister, $src$$Register); 4910 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4911 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4912 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4913 %} 4914 ins_pipe( pipe_slow ); 4915 %} 4916 #endif // _LP64 4917 4918 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4919 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4920 match(Set dst (ReplicateL con)); 4921 format %{ "movq $dst,[$constantaddress]\n\t" 4922 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4923 ins_encode %{ 4924 int vector_len = 1; 4925 __ movq($dst$$XMMRegister, $constantaddress($con)); 4926 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4927 %} 4928 ins_pipe( pipe_slow ); 4929 %} 4930 4931 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4932 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4933 match(Set dst (ReplicateL con)); 4934 format %{ "movq $dst,[$constantaddress]\n\t" 4935 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4936 ins_encode %{ 4937 int vector_len = 2; 4938 __ movq($dst$$XMMRegister, $constantaddress($con)); 4939 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4940 %} 4941 ins_pipe( pipe_slow ); 4942 %} 4943 4944 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4945 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4946 match(Set dst (ReplicateL (LoadL mem))); 4947 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4948 ins_encode %{ 4949 int vector_len = 0; 4950 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4951 %} 4952 ins_pipe( pipe_slow ); 4953 %} 4954 4955 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4956 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4957 match(Set dst (ReplicateL (LoadL mem))); 4958 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4959 ins_encode %{ 4960 int vector_len = 1; 4961 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4962 %} 4963 ins_pipe( pipe_slow ); 4964 %} 4965 4966 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4967 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4968 match(Set dst (ReplicateL (LoadL mem))); 4969 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4970 ins_encode %{ 4971 int vector_len = 2; 4972 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4978 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4979 match(Set dst (ReplicateL zero)); 4980 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4981 ins_encode %{ 4982 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4983 int vector_len = 2; 4984 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4985 %} 4986 ins_pipe( fpu_reg_reg ); 4987 %} 4988 4989 instruct Repl8F_evex(vecY dst, regF src) %{ 4990 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4991 match(Set dst (ReplicateF src)); 4992 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4993 ins_encode %{ 4994 int vector_len = 1; 4995 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4996 %} 4997 ins_pipe( pipe_slow ); 4998 %} 4999 5000 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 5001 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 5002 match(Set dst (ReplicateF (LoadF mem))); 5003 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 5004 ins_encode %{ 5005 int vector_len = 1; 5006 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct Repl16F_evex(vecZ dst, regF src) %{ 5012 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 5013 match(Set dst (ReplicateF src)); 5014 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 5015 ins_encode %{ 5016 int vector_len = 2; 5017 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5018 %} 5019 ins_pipe( pipe_slow ); 5020 %} 5021 5022 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 5023 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 5024 match(Set dst (ReplicateF (LoadF mem))); 5025 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 5026 ins_encode %{ 5027 int vector_len = 2; 5028 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 5029 %} 5030 ins_pipe( pipe_slow ); 5031 %} 5032 5033 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 5034 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 5035 match(Set dst (ReplicateF zero)); 5036 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 5037 ins_encode %{ 5038 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5039 int vector_len = 2; 5040 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5041 %} 5042 ins_pipe( fpu_reg_reg ); 5043 %} 5044 5045 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 5046 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 5047 match(Set dst (ReplicateF zero)); 5048 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 5049 ins_encode %{ 5050 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5051 int vector_len = 2; 5052 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5053 %} 5054 ins_pipe( fpu_reg_reg ); 5055 %} 5056 5057 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 5058 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5059 match(Set dst (ReplicateF zero)); 5060 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 5061 ins_encode %{ 5062 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5063 int vector_len = 2; 5064 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5065 %} 5066 ins_pipe( fpu_reg_reg ); 5067 %} 5068 5069 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 5070 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 5071 match(Set dst (ReplicateF zero)); 5072 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 5073 ins_encode %{ 5074 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 5075 int vector_len = 2; 5076 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5077 %} 5078 ins_pipe( fpu_reg_reg ); 5079 %} 5080 5081 instruct Repl4D_evex(vecY dst, regD src) %{ 5082 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 5083 match(Set dst (ReplicateD src)); 5084 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 5085 ins_encode %{ 5086 int vector_len = 1; 5087 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5088 %} 5089 ins_pipe( pipe_slow ); 5090 %} 5091 5092 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 5093 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 5094 match(Set dst (ReplicateD (LoadD mem))); 5095 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 5096 ins_encode %{ 5097 int vector_len = 1; 5098 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 5099 %} 5100 ins_pipe( pipe_slow ); 5101 %} 5102 5103 instruct Repl8D_evex(vecZ dst, regD src) %{ 5104 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5105 match(Set dst (ReplicateD src)); 5106 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 5107 ins_encode %{ 5108 int vector_len = 2; 5109 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 5115 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5116 match(Set dst (ReplicateD (LoadD mem))); 5117 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 5118 ins_encode %{ 5119 int vector_len = 2; 5120 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 5121 %} 5122 ins_pipe( pipe_slow ); 5123 %} 5124 5125 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 5126 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 5127 match(Set dst (ReplicateD zero)); 5128 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 5129 ins_encode %{ 5130 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5131 int vector_len = 2; 5132 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5133 %} 5134 ins_pipe( fpu_reg_reg ); 5135 %} 5136 5137 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 5138 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 5139 match(Set dst (ReplicateD zero)); 5140 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 5141 ins_encode %{ 5142 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5143 int vector_len = 2; 5144 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5145 %} 5146 ins_pipe( fpu_reg_reg ); 5147 %} 5148 5149 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 5150 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 5151 match(Set dst (ReplicateD zero)); 5152 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 5153 ins_encode %{ 5154 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 5155 int vector_len = 2; 5156 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 5157 %} 5158 ins_pipe( fpu_reg_reg ); 5159 %} 5160 5161 // ====================VECTOR INSERT======================================= 5162 5163 instruct rvinsert8B(vecD dst, vecD src, rRegI val, immU3 idx) %{ 5164 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5165 match(Set dst (VectorInsert (Binary src val) idx)); 5166 effect(TEMP dst); 5167 format %{ "movdqu $dst,$src\n\t" 5168 "pinsrb $dst,$val\t! Insert 8B" %} 5169 ins_encode %{ 5170 if ($dst$$XMMRegister != $src$$XMMRegister) { 5171 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5172 } 5173 __ pinsrb($dst$$XMMRegister, $val$$Register, $idx$$constant); 5174 %} 5175 ins_pipe( pipe_slow ); 5176 %} 5177 5178 instruct rvinsert16B(vecX dst, vecX src, rRegI val, immU4 idx) %{ 5179 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5180 match(Set dst (VectorInsert (Binary src val) idx)); 5181 effect(TEMP dst); 5182 format %{ "movdqu $dst,$src\n\t" 5183 "pinsrb $dst,$val\t! Insert 16B" %} 5184 ins_encode %{ 5185 if ($dst$$XMMRegister != $src$$XMMRegister) { 5186 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5187 } 5188 __ pinsrb($dst$$XMMRegister, $val$$Register, $idx$$constant); 5189 %} 5190 ins_pipe( pipe_slow ); 5191 %} 5192 5193 instruct rvinsert16B_avx(vecX dst, vecX src, rRegI val, immU4 idx) %{ 5194 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5195 match(Set dst (VectorInsert (Binary src val) idx)); 5196 effect(TEMP dst); 5197 format %{ "vmovdqu $dst,$src\n\t" 5198 "vpinsrb $dst,$dst,$val\t! Insert 16B" %} 5199 ins_encode %{ 5200 if ($dst$$XMMRegister != $src$$XMMRegister) { 5201 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5202 } 5203 __ vpinsrb($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5204 %} 5205 ins_pipe( pipe_slow ); 5206 %} 5207 5208 instruct rvinsert32B(vecY dst, vecY src, vecY tmp, rRegI val, immU5 idx) %{ 5209 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5210 match(Set dst (VectorInsert (Binary src val) idx)); 5211 effect(TEMP dst, TEMP tmp); 5212 format %{"vmovdqu $dst,$src\n\t" 5213 "vextracti128 $tmp,$src\n\t" 5214 "vpinsrb $tmp,$tmp,$val\n\t" 5215 "vinserti128 $dst,$tmp\t! Insert 32B" %} 5216 ins_encode %{ 5217 uint x_idx = $idx$$constant & right_n_bits(4); 5218 uint y_idx = ($idx$$constant >> 4) & 1; 5219 5220 if ($dst$$XMMRegister != $src$$XMMRegister) { 5221 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5222 } 5223 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5224 __ vpinsrb($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5225 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5226 %} 5227 ins_pipe( pipe_slow ); 5228 %} 5229 5230 instruct rvinsert64B(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU6 idx) %{ 5231 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5232 match(Set dst (VectorInsert (Binary src val) idx)); 5233 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5234 format %{ "evmovdquq $dst,$src\n\t" 5235 "vextracti64x4 $tmp,$src\n\t" 5236 "vextracti128 $tmp1,$tmp\n\t" 5237 "vpinsrb $tmp1,$tmp1,$val\n\t" 5238 "vinserti128 $tmp,$tmp,$tmp1\n\t" 5239 "vinserti64x4 $dst,$dst,$tmp\t! Insert 64B" %} 5240 ins_encode %{ 5241 uint x_idx = $idx$$constant & right_n_bits(4); 5242 uint y_idx = ($idx$$constant >> 4) & 1; 5243 uint z_idx = ($idx$$constant >> 5) & 1; 5244 5245 if ($dst$$XMMRegister != $src$$XMMRegister) { 5246 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5247 } 5248 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5249 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5250 __ vpinsrb($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5251 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5252 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5253 %} 5254 ins_pipe( pipe_slow ); 5255 %} 5256 5257 instruct rvinsert4S(vecD dst, vecD src, rRegI val, immU2 idx) %{ 5258 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5259 match(Set dst (VectorInsert (Binary src val) idx)); 5260 effect(TEMP dst); 5261 format %{ "movdqu $dst,$src\n\t" 5262 "pinsrw $dst,$val\t! Insert 4S" %} 5263 ins_encode %{ 5264 if ($dst$$XMMRegister != $src$$XMMRegister) { 5265 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5266 } 5267 __ pinsrw($dst$$XMMRegister, $val$$Register, $idx$$constant); 5268 %} 5269 ins_pipe( pipe_slow ); 5270 %} 5271 5272 instruct rvinsert8S(vecX dst, vecX src, rRegI val, immU3 idx) %{ 5273 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5274 match(Set dst (VectorInsert (Binary src val) idx)); 5275 effect(TEMP dst); 5276 format %{ "movdqu $dst,$src\n\t" 5277 "pinsrw $dst,$val\t! Insert 8S" %} 5278 ins_encode %{ 5279 if ($dst$$XMMRegister != $src$$XMMRegister) { 5280 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5281 } 5282 __ pinsrw($dst$$XMMRegister, $val$$Register, $idx$$constant); 5283 %} 5284 ins_pipe( pipe_slow ); 5285 %} 5286 5287 instruct rvinsert8S_avx(vecX dst, vecX src, rRegI val, immU3 idx) %{ 5288 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5289 match(Set dst (VectorInsert (Binary src val) idx)); 5290 effect(TEMP dst); 5291 format %{ "vmovdqu $dst,$src\n\t" 5292 "vpinsrw $dst,$dst,$val\t! Insert 8S" %} 5293 ins_encode %{ 5294 if ($dst$$XMMRegister != $src$$XMMRegister) { 5295 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5296 } 5297 __ vpinsrw($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5298 %} 5299 ins_pipe( pipe_slow ); 5300 %} 5301 5302 5303 instruct rvinsert16S(vecY dst, vecY src, vecX tmp, rRegI val, immU4 idx) %{ 5304 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5305 match(Set dst (VectorInsert (Binary src val) idx)); 5306 effect(TEMP dst, TEMP tmp); 5307 format %{ "vmovdqu $dst,$src\n\t" 5308 "vextracti128 $tmp,$src\n\t" 5309 "vpinsrw $tmp,$tmp,$val\n\t" 5310 "vinserti128 $dst,$dst,$tmp\t! Insert 16S" %} 5311 ins_encode %{ 5312 uint x_idx = $idx$$constant & right_n_bits(3); 5313 uint y_idx = ($idx$$constant >> 3) & 1; 5314 5315 if ($dst$$XMMRegister != $src$$XMMRegister) { 5316 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5317 } 5318 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5319 __ vpinsrw($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5320 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5321 %} 5322 ins_pipe( pipe_slow ); 5323 %} 5324 5325 instruct rvinsert32S(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU5 idx) %{ 5326 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5327 match(Set dst (VectorInsert (Binary src val) idx)); 5328 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5329 format %{ "evmovdquq $dst,$src\n\t" 5330 "vextracti64x4 $tmp,$src\n\t" 5331 "vextracti128 $tmp1,$tmp\n\t" 5332 "vpinsrw $tmp1,$tmp1,$val\n\t" 5333 "vinserti128 $tmp,$tmp,$tmp1\n\t" 5334 "vinserti64x4 $dst,$dst,$tmp\t! Insert 32S" %} 5335 ins_encode %{ 5336 uint x_idx = $idx$$constant & right_n_bits(3); 5337 uint y_idx = ($idx$$constant >> 3) & 1; 5338 uint z_idx = ($idx$$constant >> 4) & 1; 5339 5340 if ($dst$$XMMRegister != $src$$XMMRegister) { 5341 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5342 } 5343 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5344 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5345 __ vpinsrw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5346 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5347 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5348 %} 5349 ins_pipe( pipe_slow ); 5350 %} 5351 5352 instruct rvinsert2I(vecD dst, vecD src, rRegI val, immU1 idx) %{ 5353 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5354 match(Set dst (VectorInsert (Binary src val) idx)); 5355 effect(TEMP dst); 5356 format %{ "movdqu $dst,$src\n\t" 5357 "pinsrd $dst,$val\t! Insert 2I" %} 5358 ins_encode %{ 5359 if ($dst$$XMMRegister != $src$$XMMRegister) { 5360 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5361 } 5362 __ pinsrd($dst$$XMMRegister, $val$$Register, $idx$$constant); 5363 %} 5364 ins_pipe( pipe_slow ); 5365 %} 5366 5367 instruct rvinsert4I(vecX dst, vecX src, rRegI val, immU2 idx) %{ 5368 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5369 match(Set dst (VectorInsert (Binary src val) idx)); 5370 effect(TEMP dst); 5371 format %{ "movdqu $dst,$src\n\t" 5372 "pinsrd $dst,$val\t! Insert 4I" %} 5373 ins_encode %{ 5374 if ($dst$$XMMRegister != $src$$XMMRegister) { 5375 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5376 } 5377 __ pinsrd($dst$$XMMRegister, $val$$Register, $idx$$constant); 5378 %} 5379 ins_pipe( pipe_slow ); 5380 %} 5381 5382 instruct rvinsert4I_avx(vecX dst, vecX src, rRegI val, immU2 idx) %{ 5383 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5384 match(Set dst (VectorInsert (Binary src val) idx)); 5385 effect(TEMP dst); 5386 format %{ "vmovdqu $dst,$src\n\t" 5387 "vpinsrd $dst,$val\t! Insert 4I" %} 5388 ins_encode %{ 5389 if ($dst$$XMMRegister != $src$$XMMRegister) { 5390 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5391 } 5392 __ vpinsrd($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5393 %} 5394 ins_pipe( pipe_slow ); 5395 %} 5396 5397 instruct rvinsert8I(vecY dst, vecY src, vecY tmp, rRegI val, immU3 idx) %{ 5398 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5399 match(Set dst (VectorInsert (Binary src val) idx)); 5400 effect(TEMP dst, TEMP tmp); 5401 format %{ "vmovdqu $dst,$src\n\t" 5402 "vextracti128 $tmp,$src\n\t" 5403 "vpinsrd $tmp,$tmp,$val\n\t" 5404 "vinserti128 $dst,$dst,$tmp\t! Insert 8I" %} 5405 ins_encode %{ 5406 uint x_idx = $idx$$constant & right_n_bits(2); 5407 uint y_idx = ($idx$$constant >> 2) & 1; 5408 5409 if ($dst$$XMMRegister != $src$$XMMRegister) { 5410 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5411 } 5412 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5413 __ vpinsrd($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5414 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5415 %} 5416 ins_pipe( pipe_slow ); 5417 %} 5418 5419 instruct rvinsert16I(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegI val, immU4 idx) %{ 5420 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 5421 match(Set dst (VectorInsert (Binary src val) idx)); 5422 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5423 format %{ "evmovdquq $dst,$src\n\t" 5424 "vextracti64x4 $tmp,$src\n\t" 5425 "vextracti128 $tmp,$tmp\n\t" 5426 "vpinsrd $tmp,$tmp,$val\n\t" 5427 "vinserti128 $tmp,$tmp,$tmp\n\t" 5428 "vinserti64x4 $dst,$dst,$tmp\t! Insert 16I" %} 5429 ins_encode %{ 5430 uint x_idx = $idx$$constant & right_n_bits(2); 5431 uint y_idx = ($idx$$constant >> 2) & 1; 5432 uint z_idx = ($idx$$constant >> 3) & 1; 5433 5434 if ($dst$$XMMRegister != $src$$XMMRegister) { 5435 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5436 } 5437 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5438 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5439 __ vpinsrd($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5440 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5441 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5442 %} 5443 ins_pipe( pipe_slow ); 5444 %} 5445 5446 instruct rvinsert1L(vecD dst, vecD src, rRegL val, immI0 idx) %{ 5447 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5448 match(Set dst (VectorInsert (Binary src val) idx)); 5449 effect(TEMP dst); 5450 format %{ "movdqu $dst,$src\n\t" 5451 "pinsrq $dst,$val\t! Insert 1L" %} 5452 ins_encode %{ 5453 if ($dst$$XMMRegister != $src$$XMMRegister) { 5454 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5455 } 5456 __ pinsrq($dst$$XMMRegister, $val$$Register, 0); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 instruct rvinsert2L(vecX dst, vecX src, rRegL val, immU1 idx) %{ 5462 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5463 match(Set dst (VectorInsert (Binary src val) idx)); 5464 effect(TEMP dst); 5465 format %{ "movdqu $dst,$src\n\t" 5466 "pinsrq $dst,$dst\t! Insert 2L" %} 5467 ins_encode %{ 5468 if ($dst$$XMMRegister != $src$$XMMRegister) { 5469 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5470 } 5471 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 5472 %} 5473 ins_pipe( pipe_slow ); 5474 %} 5475 5476 instruct rvinsert2L_avx(vecX dst, vecX src, rRegL val, immU1 idx) %{ 5477 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5478 match(Set dst (VectorInsert (Binary src val) idx)); 5479 effect(TEMP dst); 5480 format %{ "vmovdqu $dst,$src\n\t" 5481 "vpinsrq $dst,$dst,$val\t! Insert 2L" %} 5482 ins_encode %{ 5483 if ($dst$$XMMRegister != $src$$XMMRegister) { 5484 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5485 } 5486 __ vpinsrq($dst$$XMMRegister, $dst$$XMMRegister, $val$$Register, $idx$$constant); 5487 %} 5488 ins_pipe( pipe_slow ); 5489 %} 5490 5491 instruct rvinsert4L(vecY dst, vecY src, vecY tmp, rRegL val, immU2 idx) %{ 5492 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5493 match(Set dst (VectorInsert (Binary src val) idx)); 5494 effect(TEMP dst, TEMP tmp); 5495 format %{ "vmovdqu $dst,$src\n\t" 5496 "vextracti128 $tmp,$src\n\t" 5497 "vpinsrq $tmp,$tmp,$val\n\t" 5498 "vinserti128 $dst,$dst,$tmp\t! Insert 4L" %} 5499 ins_encode %{ 5500 uint x_idx = $idx$$constant & 1; 5501 uint y_idx = ($idx$$constant >> 1) & 1; 5502 5503 if ($dst$$XMMRegister != $src$$XMMRegister) { 5504 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5505 } 5506 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5507 __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$Register, x_idx); 5508 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 instruct rvinsert8L(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, rRegL val, immU3 idx) %{ 5514 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 5515 match(Set dst (VectorInsert (Binary src val) idx)); 5516 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5517 format %{ "evmovdquq $dst,$src\n\t" 5518 "vextracti64x4 $tmp,$src\n\t" 5519 "vextracti128 $tmp,$tmp\n\t" 5520 "vpinsrq $tmp,$tmp,$val\n\t" 5521 "vinserti128 $tmp,$tmp,$tmp\n\t" 5522 "vinserti64x4 $dst,$dst,$tmp\t! Insert 8L" %} 5523 ins_encode %{ 5524 uint x_idx = $idx$$constant & 1; 5525 uint y_idx = ($idx$$constant >> 1) & 1; 5526 uint z_idx = ($idx$$constant >> 2) & 1; 5527 5528 if ($dst$$XMMRegister != $src$$XMMRegister) { 5529 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5530 } 5531 __ vextracti64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5532 __ vextracti128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5533 __ vpinsrq($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$Register, x_idx); 5534 __ vinserti128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5535 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5536 %} 5537 ins_pipe( pipe_slow ); 5538 %} 5539 5540 instruct rvinsert2F(vecD dst, vecD src, regF val, immU1 idx) %{ 5541 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5542 match(Set dst (VectorInsert (Binary src val) idx)); 5543 effect(TEMP dst); 5544 format %{ "movdqu $dst,$src\n\t" 5545 "insertps $dst,$dst,$val\t! Insert 2F" %} 5546 ins_encode %{ 5547 if ($dst$$XMMRegister != $src$$XMMRegister) { 5548 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5549 } 5550 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5551 %} 5552 ins_pipe( pipe_slow ); 5553 %} 5554 5555 instruct rvinsert2F_avx(vecD dst, vecD src, regF val, immU1 idx) %{ 5556 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5557 match(Set dst (VectorInsert (Binary src val) idx)); 5558 effect(TEMP dst); 5559 format %{ "movdqu $dst,$src\n\t" 5560 "insertps $dst,$dst,$val\t! Insert 2F" %} 5561 ins_encode %{ 5562 if ($dst$$XMMRegister != $src$$XMMRegister) { 5563 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5564 } 5565 __ vinsertps($dst$$XMMRegister, $dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5566 %} 5567 ins_pipe( pipe_slow ); 5568 %} 5569 5570 instruct rvinsert4F(vecX dst, vecX src, regF val, immU2 idx) %{ 5571 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5572 match(Set dst (VectorInsert (Binary src val) idx)); 5573 effect(TEMP dst); 5574 format %{ "movdqu $dst,$src\n\t" 5575 "insertps $dst,$dst,$val\t! Insert 4F" %} 5576 ins_encode %{ 5577 if ($dst$$XMMRegister != $src$$XMMRegister) { 5578 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5579 } 5580 __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5581 %} 5582 ins_pipe( pipe_slow ); 5583 %} 5584 5585 instruct rvinsert4F_avx(vecX dst, vecX src, regF val, immU2 idx) %{ 5586 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5587 match(Set dst (VectorInsert (Binary src val) idx)); 5588 effect(TEMP dst); 5589 format %{ "vmovdqu $dst,$src\n\t" 5590 "vinsertps $dst,$dst,$val\t! Insert 4F" %} 5591 ins_encode %{ 5592 if ($dst$$XMMRegister != $src$$XMMRegister) { 5593 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5594 } 5595 __ vinsertps($dst$$XMMRegister, $dst$$XMMRegister, $val$$XMMRegister, $idx$$constant); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 instruct rvinsert8F(vecY dst, vecY src, vecY tmp, regF val, immU3 idx) %{ 5601 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5602 match(Set dst (VectorInsert (Binary src val) idx)); 5603 effect(TEMP dst, TEMP tmp); 5604 format %{ "vmovdqu $dst,$src\n\t" 5605 "vextractf128 $tmp,$src\n\t" 5606 "vinsertps $tmp,$tmp,$val\n\t" 5607 "vinsertf128 $dst,$dst,$tmp\t! Insert 8F" %} 5608 ins_encode %{ 5609 uint x_idx = $idx$$constant & right_n_bits(2); 5610 uint y_idx = ($idx$$constant >> 2) & 1; 5611 5612 if ($dst$$XMMRegister != $src$$XMMRegister) { 5613 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5614 } 5615 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5616 __ vinsertps($tmp$$XMMRegister, $tmp$$XMMRegister, $val$$XMMRegister, x_idx); 5617 __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5618 %} 5619 ins_pipe( pipe_slow ); 5620 %} 5621 5622 instruct rvinsert16F(vecZ dst, vecZ src, vecZ tmp, vecX tmp1, regF val, immU4 idx) %{ 5623 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 5624 match(Set dst (VectorInsert (Binary src val) idx)); 5625 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5626 format %{ "evmovdquq $dst,$src\n\t" 5627 "vextractf128 $tmp,$src\n\t" 5628 "vinsertps $tmp,$tmp,$val\n\t" 5629 "movsbl $dst,$dst\t! Insert 4I" %} 5630 ins_encode %{ 5631 uint x_idx = $idx$$constant & right_n_bits(2); 5632 uint y_idx = ($idx$$constant >> 2) & 1; 5633 uint z_idx = ($idx$$constant >> 3) & 1; 5634 5635 if ($dst$$XMMRegister != $src$$XMMRegister) { 5636 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5637 } 5638 __ vextractf64x4($tmp$$XMMRegister, $src$$XMMRegister, z_idx); 5639 __ vextractf128($tmp1$$XMMRegister, $tmp$$XMMRegister, y_idx); 5640 __ vinsertps($tmp1$$XMMRegister, $tmp1$$XMMRegister, $val$$XMMRegister, x_idx); 5641 __ vinsertf128($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, y_idx); 5642 __ vinsertf64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, z_idx); 5643 %} 5644 ins_pipe( pipe_slow ); 5645 %} 5646 5647 instruct rvinsert1D(vecD dst, vecD src, regD val, rRegL tmp, immI0 idx) %{ 5648 predicate(UseSSE > 3 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5649 match(Set dst (VectorInsert (Binary src val) idx)); 5650 effect(TEMP dst, TEMP tmp); 5651 format %{ "movdqu $dst,$src\n\t" 5652 "movq $tmp,$val\n\t" 5653 "pinsrq $dst,$tmp\t! Insert 1D" %} 5654 ins_encode %{ 5655 if ($dst$$XMMRegister != $src$$XMMRegister) { 5656 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5657 } 5658 __ movq($tmp$$Register, $val$$XMMRegister); 5659 __ pinsrq($dst$$XMMRegister, $tmp$$Register, 0); 5660 %} 5661 ins_pipe( pipe_slow ); 5662 %} 5663 5664 instruct rvinsert2D(vecX dst, vecX src, regD val, rRegL tmp, immU1 idx) %{ 5665 predicate(UseSSE > 3 && UseAVX == 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5666 match(Set dst (VectorInsert (Binary src val) idx)); 5667 effect(TEMP dst, TEMP tmp); 5668 format %{ "movdqu $dst,$src\n\t" 5669 "movq $dst,$src\n\t" 5670 "pinsrq $dst,$dst\t! Insert 2D" %} 5671 ins_encode %{ 5672 if ($dst$$XMMRegister != $src$$XMMRegister) { 5673 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 5674 } 5675 __ movq($tmp$$Register, $val$$XMMRegister); 5676 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5677 %} 5678 ins_pipe( pipe_slow ); 5679 %} 5680 5681 instruct rvinsert2D_avx(vecX dst, vecX src, regD val, rRegL tmp, immU1 idx) %{ 5682 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5683 match(Set dst (VectorInsert (Binary src val) idx)); 5684 effect(TEMP dst, TEMP tmp); 5685 format %{ "vmovdqu $dst,$src\n\t" 5686 "movq $tmp,$val\n\t" 5687 "vpinsrq $dst,$dst,$tmp\t! Insert 2D" %} 5688 ins_encode %{ 5689 if ($dst$$XMMRegister != $src$$XMMRegister) { 5690 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5691 } 5692 __ movq($tmp$$Register, $val$$XMMRegister); 5693 __ vpinsrq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5694 %} 5695 ins_pipe( pipe_slow ); 5696 %} 5697 5698 instruct rvinsert4D(vecY dst, vecY src, vecY tmp, regD val, rRegL tmp1, immU2 idx) %{ 5699 predicate(UseAVX > 0 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5700 match(Set dst (VectorInsert (Binary src val) idx)); 5701 effect(TEMP dst, TEMP tmp, TEMP tmp1); 5702 format %{ "vmovdqu $dst,$src\n\t" 5703 "vextracti128 $tmp,$src\n\t" 5704 "movq $tmp1,$val\n\t" 5705 "vpinsrq $tmp,$tmp,$tmp1\n\t" 5706 "vinserti128 $dst,$dst,$tmp\t! Insert 4D" %} 5707 ins_encode %{ 5708 uint x_idx = $idx$$constant & 1; 5709 uint y_idx = ($idx$$constant >> 1) & 1; 5710 5711 if ($dst$$XMMRegister != $src$$XMMRegister) { 5712 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 5713 } 5714 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, y_idx); 5715 __ movq($tmp1$$Register, $val$$XMMRegister); 5716 __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$Register, x_idx); 5717 __ vinsertf128($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, y_idx); 5718 %} 5719 ins_pipe( pipe_slow ); 5720 %} 5721 5722 instruct rvinsert8D(vecZ dst, vecZ src, vecZ tmp, vecY tmp2, regD val, rRegL tmp1, immU3 idx) %{ 5723 predicate(UseAVX > 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 5724 match(Set dst (VectorInsert (Binary src val) idx)); 5725 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2); 5726 format %{ "evmovdquq $dst,$src\n\t" 5727 "vextractf64x4 $tmp,$src\n\t" 5728 "vextractf128 $tmp,$tmp\n\t" 5729 "movq $tmp1,$val\n\t" 5730 "vpinsrq $tmp,$tmp,$val\n\t" 5731 "vinsertf128 $tmp,$tmp,$tmp\n\t" 5732 "vinsertf64x4 $dst,$dst,$tmp\t! Insert 8D" %} 5733 ins_encode %{ 5734 uint x_idx = $idx$$constant & 1; 5735 uint y_idx = ($idx$$constant >> 1) & 1; 5736 uint z_idx = ($idx$$constant >> 2) & 1; 5737 5738 if ($dst$$XMMRegister != $src$$XMMRegister) { 5739 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, 2); 5740 } 5741 __ vextractf64x4($tmp2$$XMMRegister, $src$$XMMRegister, z_idx); 5742 __ vextractf128($tmp$$XMMRegister, $tmp2$$XMMRegister, y_idx); 5743 __ movq($tmp1$$Register, $val$$XMMRegister); 5744 __ vpinsrq($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$Register, x_idx); 5745 __ vinsertf128($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, y_idx); 5746 __ vinsertf64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, z_idx); 5747 %} 5748 ins_pipe( pipe_slow ); 5749 %} 5750 5751 // ====================REDUCTION ARITHMETIC======================================= 5752 5753 instruct rsadd8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5754 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5755 match(Set dst (AddReductionVI src1 src2)); 5756 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5757 format %{ 5758 "pshufd $tmp,$src2,0x1\n\t" 5759 "paddb $tmp,$src2\n\t" 5760 "movzbl $dst,$src1\n\t" 5761 "pextrb $tmp2,$tmp, 0x0\n\t" 5762 "addl $dst,$tmp2\n\t" 5763 "pextrb $tmp2,$tmp, 0x1\n\t" 5764 "addl $dst,$tmp2\n\t" 5765 "pextrb $tmp2,$tmp, 0x2\n\t" 5766 "addl $dst,$tmp2\n\t" 5767 "pextrb $tmp2,$tmp, 0x3\n\t" 5768 "addl $dst,$tmp2\n\t" 5769 "movsbl $dst,$dst\t! add reduction8B" %} 5770 ins_encode %{ 5771 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 5772 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 5773 __ movzbl($dst$$Register, $src1$$Register); 5774 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5775 __ addl($dst$$Register, $tmp2$$Register); 5776 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 5777 __ addl($dst$$Register, $tmp2$$Register); 5778 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 5779 __ addl($dst$$Register, $tmp2$$Register); 5780 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 5781 __ addl($dst$$Register, $tmp2$$Register); 5782 __ movsbl($dst$$Register, $dst$$Register); 5783 %} 5784 ins_pipe( pipe_slow ); 5785 %} 5786 5787 instruct rsadd16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5788 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5789 match(Set dst (AddReductionVI src1 src2)); 5790 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5791 format %{ "pshufd $tmp,$src2,0xE\n\t" 5792 "paddb $tmp,$src2\n\t" 5793 "pshufd $tmp2,$tmp,0x1\n\t" 5794 "paddb $tmp,$tmp,$tmp2\n\t" 5795 "movzbl $dst,$src1\n\t" 5796 "pextrb $tmp3,$tmp, 0x0\n\t" 5797 "addl $dst,$tmp3\n\t" 5798 "pextrb $tmp3,$tmp, 0x1\n\t" 5799 "addl $dst,$tmp3\n\t" 5800 "pextrb $tmp3,$tmp, 0x2\n\t" 5801 "addl $dst,$tmp3\n\t" 5802 "pextrb $tmp3,$tmp, 0x3\n\t" 5803 "addl $dst,$tmp3\n\t" 5804 "movsbl $dst,$dst\t! add reduction16B" %} 5805 ins_encode %{ 5806 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5807 __ paddb($tmp$$XMMRegister, $src2$$XMMRegister); 5808 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5809 __ paddb($tmp$$XMMRegister, $tmp2$$XMMRegister); 5810 __ movzbl($dst$$Register, $src1$$Register); 5811 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 5812 __ addl($dst$$Register, $tmp3$$Register); 5813 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 5814 __ addl($dst$$Register, $tmp3$$Register); 5815 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5816 __ addl($dst$$Register, $tmp3$$Register); 5817 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5818 __ addl($dst$$Register, $tmp3$$Register); 5819 __ movsbl($dst$$Register, $dst$$Register); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 instruct rvadd32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5825 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5826 match(Set dst (AddReductionVI src1 src2)); 5827 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5828 format %{ "vextracti128_high $tmp,$src2\n\t" 5829 "vpaddb $tmp,$tmp,$src2\n\t" 5830 "pshufd $tmp2,$tmp,0xE\n\t" 5831 "vpaddb $tmp,$tmp,$tmp2\n\t" 5832 "pshufd $tmp2,$tmp,0x1\n\t" 5833 "vpaddb $tmp,$tmp,$tmp2\n\t" 5834 "movzbl $dst,$src1\n\t" 5835 "pextrb $tmp3,$tmp, 0x0\n\t" 5836 "addl $dst,$tmp3\n\t" 5837 "pextrb $tmp3,$tmp, 0x1\n\t" 5838 "addl $dst,$tmp3\n\t" 5839 "pextrb $tmp3,$tmp, 0x2\n\t" 5840 "addl $dst,$tmp3\n\t" 5841 "pextrb $tmp3,$tmp, 0x3\n\t" 5842 "addl $dst,$tmp3\n\t" 5843 "movsbl $dst,$dst\t! add reduction32B" %} 5844 ins_encode %{ 5845 int vector_len = 0; 5846 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5847 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5848 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5849 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5850 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5851 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5852 __ movzbl($dst$$Register, $src1$$Register); 5853 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 5854 __ addl($dst$$Register, $tmp3$$Register); 5855 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 5856 __ addl($dst$$Register, $tmp3$$Register); 5857 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 5858 __ addl($dst$$Register, $tmp3$$Register); 5859 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 5860 __ addl($dst$$Register, $tmp3$$Register); 5861 __ movsbl($dst$$Register, $dst$$Register); 5862 %} 5863 ins_pipe( pipe_slow ); 5864 %} 5865 5866 instruct rvadd64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 5867 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 5868 match(Set dst (AddReductionVI src1 src2)); 5869 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 5870 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5871 "vpaddb $tmp2,$tmp2,$src2\n\t" 5872 "vextracti128_high $tmp,$tmp2\n\t" 5873 "vpaddb $tmp,$tmp,$tmp2\n\t" 5874 "pshufd $tmp2,$tmp,0xE\n\t" 5875 "vpaddb $tmp,$tmp,$tmp2\n\t" 5876 "pshufd $tmp2,$tmp,0x1\n\t" 5877 "vpaddb $tmp,$tmp,$tmp2\n\t" 5878 "movzbl $dst,$src1\n\t" 5879 "movdl $tmp3,$tmp\n\t" 5880 "addl $dst,$tmp3\n\t" 5881 "shrl $tmp3,0x8\n\t" 5882 "addl $dst,$tmp3\n\t" 5883 "shrl $tmp3,0x8\n\t" 5884 "addl $dst,$tmp3\n\t" 5885 "shrl $tmp3,0x8\n\t" 5886 "addl $dst,$tmp3\n\t" 5887 "movsbl $dst,$dst\t! add reduction64B" %} 5888 ins_encode %{ 5889 int vector_len = 0; 5890 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5891 __ vpaddb($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5892 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5893 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5894 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5895 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5896 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5897 __ vpaddb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5898 __ movzbl($dst$$Register, $src1$$Register); 5899 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 5900 __ addl($dst$$Register, $tmp3$$Register); 5901 __ shrl($tmp3$$Register, 8); 5902 __ addl($dst$$Register, $tmp3$$Register); 5903 __ shrl($tmp3$$Register, 8); 5904 __ addl($dst$$Register, $tmp3$$Register); 5905 __ shrl($tmp3$$Register, 8); 5906 __ addl($dst$$Register, $tmp3$$Register); 5907 __ movsbl($dst$$Register, $dst$$Register); 5908 %} 5909 ins_pipe( pipe_slow ); 5910 %} 5911 5912 instruct rsadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5913 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5914 match(Set dst (AddReductionVI src1 src2)); 5915 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5916 format %{ 5917 "movdqu $tmp,$src2\n\t" 5918 "phaddw $tmp,$tmp\n\t" 5919 "phaddw $tmp,$tmp\n\t" 5920 "movzwl $dst,$src1\n\t" 5921 "pextrw $tmp2,$tmp, 0x0\n\t" 5922 "addw $dst,$tmp2\n\t" 5923 "movswl $dst,$dst\t! add reduction4S" %} 5924 ins_encode %{ 5925 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5926 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5927 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5928 __ movzwl($dst$$Register, $src1$$Register); 5929 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5930 __ addw($dst$$Register, $tmp2$$Register); 5931 __ movswl($dst$$Register, $dst$$Register); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 instruct rvadd4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 5937 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5938 match(Set dst (AddReductionVI src1 src2)); 5939 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5940 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5941 "vphaddw $tmp,$tmp,$tmp\n\t" 5942 "movzwl $dst,$src1\n\t" 5943 "pextrw $tmp2,$tmp, 0x0\n\t" 5944 "addw $dst,$tmp2\n\t" 5945 "movswl $dst,$dst\t! add reduction4S" %} 5946 ins_encode %{ 5947 int vector_len = 0; 5948 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5949 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5950 __ movzwl($dst$$Register, $src1$$Register); 5951 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5952 __ addw($dst$$Register, $tmp2$$Register); 5953 __ movswl($dst$$Register, $dst$$Register); 5954 %} 5955 ins_pipe( pipe_slow ); 5956 %} 5957 5958 instruct rsadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5959 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5960 match(Set dst (AddReductionVI src1 src2)); 5961 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5962 format %{ 5963 "movdqu $tmp,$src2\n\t" 5964 "phaddw $tmp,$tmp\n\t" 5965 "phaddw $tmp,$tmp\n\t" 5966 "phaddw $tmp,$tmp\n\t" 5967 "movzwl $dst,$src1\n\t" 5968 "pextrw $tmp2,$tmp, 0x0\n\t" 5969 "addw $dst,$tmp2\n\t" 5970 "movswl $dst,$dst\t! add reduction8S" %} 5971 ins_encode %{ 5972 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 5973 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5974 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5975 __ phaddw($tmp$$XMMRegister, $tmp$$XMMRegister); 5976 __ movzwl($dst$$Register, $src1$$Register); 5977 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 5978 __ addw($dst$$Register, $tmp2$$Register); 5979 __ movswl($dst$$Register, $dst$$Register); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 instruct rvadd8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2) %{ 5985 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 5986 match(Set dst (AddReductionVI src1 src2)); 5987 effect(TEMP tmp, TEMP tmp2, TEMP dst); 5988 format %{ "vphaddw $tmp,$src2,$src2\n\t" 5989 "vphaddw $tmp,$tmp,$tmp\n\t" 5990 "vphaddw $tmp,$tmp,$tmp\n\t" 5991 "movzwl $dst,$src1\n\t" 5992 "pextrw $tmp2,$tmp, 0x0\n\t" 5993 "addw $dst,$tmp2\n\t" 5994 "movswl $dst,$dst\t! add reduction8S" %} 5995 ins_encode %{ 5996 int vector_len = 0; 5997 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 5998 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 5999 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6000 __ movzwl($dst$$Register, $src1$$Register); 6001 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6002 __ addw($dst$$Register, $tmp2$$Register); 6003 __ movswl($dst$$Register, $dst$$Register); 6004 %} 6005 ins_pipe( pipe_slow ); 6006 %} 6007 6008 instruct rvadd16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2) %{ 6009 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6010 match(Set dst (AddReductionVI src1 src2)); 6011 effect(TEMP tmp, TEMP tmp2, TEMP dst); 6012 format %{ "vphaddw $tmp,$src2,$src2\n\t" 6013 "vphaddw $tmp,$tmp,$tmp\n\t" 6014 "vphaddw $tmp,$tmp,$tmp\n\t" 6015 "vphaddw $tmp,$tmp,$tmp\n\t" 6016 "movzwl $dst,$src1\n\t" 6017 "pextrw $tmp2,$tmp, 0x0\n\t" 6018 "addw $dst,$tmp2\n\t" 6019 "movswl $dst,$dst\t! add reduction16S" %} 6020 ins_encode %{ 6021 int vector_len = 1; 6022 __ vphaddw($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6023 __ vpermq($tmp$$XMMRegister, $tmp$$XMMRegister, 0xD8, vector_len); 6024 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6025 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6026 __ vphaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6027 __ movzwl($dst$$Register, $src1$$Register); 6028 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 6029 __ addw($dst$$Register, $tmp2$$Register); 6030 __ movswl($dst$$Register, $dst$$Register); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 instruct rvadd32S_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 6036 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 6037 match(Set dst (AddReductionVI src1 src2)); 6038 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 6039 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6040 "vpaddw $tmp2,$tmp2,$src2\n\t" 6041 "vextracti128_high $tmp,$tmp2\n\t" 6042 "vpaddw $tmp,$tmp,$tmp2\n\t" 6043 "pshufd $tmp2,$tmp,0xE\n\t" 6044 "vpaddw $tmp,$tmp,$tmp2\n\t" 6045 "pshufd $tmp2,$tmp,0x1\n\t" 6046 "vpaddw $tmp,$tmp,$tmp2\n\t" 6047 "movdl $tmp3,$tmp\n\t" 6048 "addw $dst,$tmp3\n\t" 6049 "shrl $tmp3,0x16\n\t" 6050 "addw $dst,$tmp3\n\t" 6051 "movswl $dst,$dst\t! add reduction32S" %} 6052 ins_encode %{ 6053 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6054 __ vpaddw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6055 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6056 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6057 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6058 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6059 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6060 __ vpaddw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6061 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 6062 __ movzwl($dst$$Register, $src1$$Register); 6063 __ addw($dst$$Register, $tmp3$$Register); 6064 __ shrl($tmp3$$Register, 16); 6065 __ addw($dst$$Register, $tmp3$$Register); 6066 __ movswl($dst$$Register, $dst$$Register); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6072 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6073 match(Set dst (AddReductionVI src1 src2)); 6074 effect(TEMP tmp2, TEMP tmp); 6075 format %{ "movdqu $tmp2,$src2\n\t" 6076 "phaddd $tmp2,$tmp2\n\t" 6077 "movd $tmp,$src1\n\t" 6078 "paddd $tmp,$tmp2\n\t" 6079 "movd $dst,$tmp\t! add reduction2I" %} 6080 ins_encode %{ 6081 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 6082 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 6083 __ movdl($tmp$$XMMRegister, $src1$$Register); 6084 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 6085 __ movdl($dst$$Register, $tmp$$XMMRegister); 6086 %} 6087 ins_pipe( pipe_slow ); 6088 %} 6089 6090 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6091 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6092 match(Set dst (AddReductionVI src1 src2)); 6093 effect(TEMP tmp, TEMP tmp2); 6094 format %{ "vphaddd $tmp,$src2,$src2\n\t" 6095 "movd $tmp2,$src1\n\t" 6096 "vpaddd $tmp2,$tmp2,$tmp\n\t" 6097 "movd $dst,$tmp2\t! add reduction2I" %} 6098 ins_encode %{ 6099 int vector_len = 0; 6100 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6101 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6102 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6103 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6104 %} 6105 ins_pipe( pipe_slow ); 6106 %} 6107 6108 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 6109 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6110 match(Set dst (AddReductionVI src1 src2)); 6111 effect(TEMP tmp, TEMP tmp2); 6112 format %{ "pshufd $tmp2,$src2,0x1\n\t" 6113 "vpaddd $tmp,$src2,$tmp2\n\t" 6114 "movd $tmp2,$src1\n\t" 6115 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6116 "movd $dst,$tmp2\t! add reduction2I" %} 6117 ins_encode %{ 6118 int vector_len = 0; 6119 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6120 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6121 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6122 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6123 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6124 %} 6125 ins_pipe( pipe_slow ); 6126 %} 6127 6128 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6129 predicate(UseSSE > 2 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6130 match(Set dst (AddReductionVI src1 src2)); 6131 effect(TEMP tmp, TEMP tmp2); 6132 format %{ "movdqu $tmp,$src2\n\t" 6133 "phaddd $tmp,$tmp\n\t" 6134 "phaddd $tmp,$tmp\n\t" 6135 "movd $tmp2,$src1\n\t" 6136 "paddd $tmp2,$tmp\n\t" 6137 "movd $dst,$tmp2\t! add reduction4I" %} 6138 ins_encode %{ 6139 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 6140 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 6141 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 6142 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6143 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 6144 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6150 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6151 match(Set dst (AddReductionVI src1 src2)); 6152 effect(TEMP tmp, TEMP tmp2); 6153 format %{ "vphaddd $tmp,$src2,$src2\n\t" 6154 "vphaddd $tmp,$tmp,$tmp\n\t" 6155 "movd $tmp2,$src1\n\t" 6156 "vpaddd $tmp2,$tmp2,$tmp\n\t" 6157 "movd $dst,$tmp2\t! add reduction4I" %} 6158 ins_encode %{ 6159 int vector_len = 0; 6160 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6161 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6162 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6163 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 6164 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 6170 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6171 match(Set dst (AddReductionVI src1 src2)); 6172 effect(TEMP tmp, TEMP tmp2); 6173 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6174 "vpaddd $tmp,$src2,$tmp2\n\t" 6175 "pshufd $tmp2,$tmp,0x1\n\t" 6176 "vpaddd $tmp,$tmp,$tmp2\n\t" 6177 "movd $tmp2,$src1\n\t" 6178 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6179 "movd $dst,$tmp2\t! add reduction4I" %} 6180 ins_encode %{ 6181 int vector_len = 0; 6182 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6183 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6184 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6185 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6186 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6187 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6188 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6189 %} 6190 ins_pipe( pipe_slow ); 6191 %} 6192 6193 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6194 predicate(VM_Version::supports_avxonly() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6195 match(Set dst (AddReductionVI src1 src2)); 6196 effect(TEMP tmp, TEMP tmp2); 6197 format %{ "vphaddd $tmp,$src2,$src2\n\t" 6198 "vphaddd $tmp,$tmp,$tmp2\n\t" 6199 "vextracti128_high $tmp2,$tmp\n\t" 6200 "vpaddd $tmp,$tmp,$tmp2\n\t" 6201 "movd $tmp2,$src1\n\t" 6202 "vpaddd $tmp2,$tmp2,$tmp\n\t" 6203 "movd $dst,$tmp2\t! add reduction8I" %} 6204 ins_encode %{ 6205 int vector_len = 1; 6206 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 6207 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6208 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 6209 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6210 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6211 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6212 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6213 %} 6214 ins_pipe( pipe_slow ); 6215 %} 6216 6217 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 6218 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6219 match(Set dst (AddReductionVI src1 src2)); 6220 effect(TEMP tmp, TEMP tmp2); 6221 format %{ "vextracti128_high $tmp,$src2\n\t" 6222 "vpaddd $tmp,$tmp,$src2\n\t" 6223 "pshufd $tmp2,$tmp,0xE\n\t" 6224 "vpaddd $tmp,$tmp,$tmp2\n\t" 6225 "pshufd $tmp2,$tmp,0x1\n\t" 6226 "vpaddd $tmp,$tmp,$tmp2\n\t" 6227 "movd $tmp2,$src1\n\t" 6228 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6229 "movd $dst,$tmp2\t! add reduction8I" %} 6230 ins_encode %{ 6231 int vector_len = 0; 6232 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6233 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 6234 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6235 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6236 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6237 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6238 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6239 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 6240 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6241 %} 6242 ins_pipe( pipe_slow ); 6243 %} 6244 6245 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 6246 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 6247 match(Set dst (AddReductionVI src1 src2)); 6248 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 6249 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 6250 "vpaddd $tmp3,$tmp3,$src2\n\t" 6251 "vextracti128_high $tmp,$tmp3\n\t" 6252 "vpaddd $tmp,$tmp,$tmp3\n\t" 6253 "pshufd $tmp2,$tmp,0xE\n\t" 6254 "vpaddd $tmp,$tmp,$tmp2\n\t" 6255 "pshufd $tmp2,$tmp,0x1\n\t" 6256 "vpaddd $tmp,$tmp,$tmp2\n\t" 6257 "movd $tmp2,$src1\n\t" 6258 "vpaddd $tmp2,$tmp,$tmp2\n\t" 6259 "movd $dst,$tmp2\t! mul reduction16I" %} 6260 ins_encode %{ 6261 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 6262 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 6263 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 6264 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 6265 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 6266 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6267 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 6268 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6269 __ movdl($tmp2$$XMMRegister, $src1$$Register); 6270 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6271 __ movdl($dst$$Register, $tmp2$$XMMRegister); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 #ifdef _LP64 6277 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 6278 predicate(UseAVX > 2); 6279 match(Set dst (AddReductionVL src1 src2)); 6280 effect(TEMP tmp, TEMP tmp2); 6281 format %{ "pshufd $tmp2,$src2,0xE\n\t" 6282 "vpaddq $tmp,$src2,$tmp2\n\t" 6283 "movdq $tmp2,$src1\n\t" 6284 "vpaddq $tmp2,$tmp,$tmp2\n\t" 6285 "movdq $dst,$tmp2\t! add reduction2L" %} 6286 ins_encode %{ 6287 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 6288 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 6289 __ movdq($tmp2$$XMMRegister, $src1$$Register); 6290 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 6291 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 6297 predicate(UseAVX > 2); 6298 match(Set dst (AddReductionVL src1 src2)); 6299 effect(TEMP tmp, TEMP tmp2); 6300 format %{ "vextracti128_high $tmp,$src2\n\t" 6301 "vpaddq $tmp2,$tmp,$src2\n\t" 6302 "pshufd $tmp,$tmp2,0xE\n\t" 6303 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6304 "movdq $tmp,$src1\n\t" 6305 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6306 "movdq $dst,$tmp2\t! add reduction4L" %} 6307 ins_encode %{ 6308 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6309 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 6310 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6311 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6312 __ movdq($tmp$$XMMRegister, $src1$$Register); 6313 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6314 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6315 %} 6316 ins_pipe( pipe_slow ); 6317 %} 6318 6319 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 6320 predicate(UseAVX > 2); 6321 match(Set dst (AddReductionVL src1 src2)); 6322 effect(TEMP tmp, TEMP tmp2); 6323 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 6324 "vpaddq $tmp2,$tmp2,$src2\n\t" 6325 "vextracti128_high $tmp,$tmp2\n\t" 6326 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6327 "pshufd $tmp,$tmp2,0xE\n\t" 6328 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6329 "movdq $tmp,$src1\n\t" 6330 "vpaddq $tmp2,$tmp2,$tmp\n\t" 6331 "movdq $dst,$tmp2\t! add reduction8L" %} 6332 ins_encode %{ 6333 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6334 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 6335 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 6336 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6337 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6338 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6339 __ movdq($tmp$$XMMRegister, $src1$$Register); 6340 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 6341 __ movdq($dst$$Register, $tmp2$$XMMRegister); 6342 %} 6343 ins_pipe( pipe_slow ); 6344 %} 6345 #endif 6346 6347 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6348 predicate(UseSSE >= 1 && UseAVX == 0); 6349 match(Set dst (AddReductionVF dst src2)); 6350 effect(TEMP dst, TEMP tmp); 6351 format %{ "addss $dst,$src2\n\t" 6352 "pshufd $tmp,$src2,0x01\n\t" 6353 "addss $dst,$tmp\t! add reduction2F" %} 6354 ins_encode %{ 6355 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 6356 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6357 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6358 %} 6359 ins_pipe( pipe_slow ); 6360 %} 6361 6362 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6363 predicate(UseAVX > 0); 6364 match(Set dst (AddReductionVF dst src2)); 6365 effect(TEMP dst, TEMP tmp); 6366 format %{ "vaddss $dst,$dst,$src2\n\t" 6367 "pshufd $tmp,$src2,0x01\n\t" 6368 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 6369 ins_encode %{ 6370 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6371 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6372 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6378 predicate(UseSSE >= 1 && UseAVX == 0); 6379 match(Set dst (AddReductionVF dst src2)); 6380 effect(TEMP dst, TEMP tmp); 6381 format %{ "addss $dst,$src2\n\t" 6382 "pshufd $tmp,$src2,0x01\n\t" 6383 "addss $dst,$tmp\n\t" 6384 "pshufd $tmp,$src2,0x02\n\t" 6385 "addss $dst,$tmp\n\t" 6386 "pshufd $tmp,$src2,0x03\n\t" 6387 "addss $dst,$tmp\t! add reduction4F" %} 6388 ins_encode %{ 6389 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 6390 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6391 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6392 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6393 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6394 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6395 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 6396 %} 6397 ins_pipe( pipe_slow ); 6398 %} 6399 6400 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6401 predicate(UseAVX > 0); 6402 match(Set dst (AddReductionVF dst src2)); 6403 effect(TEMP tmp, TEMP dst); 6404 format %{ "vaddss $dst,dst,$src2\n\t" 6405 "pshufd $tmp,$src2,0x01\n\t" 6406 "vaddss $dst,$dst,$tmp\n\t" 6407 "pshufd $tmp,$src2,0x02\n\t" 6408 "vaddss $dst,$dst,$tmp\n\t" 6409 "pshufd $tmp,$src2,0x03\n\t" 6410 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 6411 ins_encode %{ 6412 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6413 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6414 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6415 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6416 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6417 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6418 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6419 %} 6420 ins_pipe( pipe_slow ); 6421 %} 6422 6423 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 6424 predicate(UseAVX > 0); 6425 match(Set dst (AddReductionVF dst src2)); 6426 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6427 format %{ "vaddss $dst,$dst,$src2\n\t" 6428 "pshufd $tmp,$src2,0x01\n\t" 6429 "vaddss $dst,$dst,$tmp\n\t" 6430 "pshufd $tmp,$src2,0x02\n\t" 6431 "vaddss $dst,$dst,$tmp\n\t" 6432 "pshufd $tmp,$src2,0x03\n\t" 6433 "vaddss $dst,$dst,$tmp\n\t" 6434 "vextractf128_high $tmp2,$src2\n\t" 6435 "vaddss $dst,$dst,$tmp2\n\t" 6436 "pshufd $tmp,$tmp2,0x01\n\t" 6437 "vaddss $dst,$dst,$tmp\n\t" 6438 "pshufd $tmp,$tmp2,0x02\n\t" 6439 "vaddss $dst,$dst,$tmp\n\t" 6440 "pshufd $tmp,$tmp2,0x03\n\t" 6441 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 6442 ins_encode %{ 6443 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6444 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6445 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6446 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6447 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6448 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6449 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6450 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6451 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6452 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6453 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6454 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6455 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6456 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6457 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 6463 predicate(UseAVX > 2); 6464 match(Set dst (AddReductionVF dst src2)); 6465 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6466 format %{ "vaddss $dst,$dst,$src2\n\t" 6467 "pshufd $tmp,$src2,0x01\n\t" 6468 "vaddss $dst,$dst,$tmp\n\t" 6469 "pshufd $tmp,$src2,0x02\n\t" 6470 "vaddss $dst,$dst,$tmp\n\t" 6471 "pshufd $tmp,$src2,0x03\n\t" 6472 "vaddss $dst,$dst,$tmp\n\t" 6473 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6474 "vaddss $dst,$dst,$tmp2\n\t" 6475 "pshufd $tmp,$tmp2,0x01\n\t" 6476 "vaddss $dst,$dst,$tmp\n\t" 6477 "pshufd $tmp,$tmp2,0x02\n\t" 6478 "vaddss $dst,$dst,$tmp\n\t" 6479 "pshufd $tmp,$tmp2,0x03\n\t" 6480 "vaddss $dst,$dst,$tmp\n\t" 6481 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6482 "vaddss $dst,$dst,$tmp2\n\t" 6483 "pshufd $tmp,$tmp2,0x01\n\t" 6484 "vaddss $dst,$dst,$tmp\n\t" 6485 "pshufd $tmp,$tmp2,0x02\n\t" 6486 "vaddss $dst,$dst,$tmp\n\t" 6487 "pshufd $tmp,$tmp2,0x03\n\t" 6488 "vaddss $dst,$dst,$tmp\n\t" 6489 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6490 "vaddss $dst,$dst,$tmp2\n\t" 6491 "pshufd $tmp,$tmp2,0x01\n\t" 6492 "vaddss $dst,$dst,$tmp\n\t" 6493 "pshufd $tmp,$tmp2,0x02\n\t" 6494 "vaddss $dst,$dst,$tmp\n\t" 6495 "pshufd $tmp,$tmp2,0x03\n\t" 6496 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 6497 ins_encode %{ 6498 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6499 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6500 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6501 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6502 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6503 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6504 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6505 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6506 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6507 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6508 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6509 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6510 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6511 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6512 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6513 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6514 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6515 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6516 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6517 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6518 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6519 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6520 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6521 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6522 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6523 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6524 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6525 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6526 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6527 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6528 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6529 %} 6530 ins_pipe( pipe_slow ); 6531 %} 6532 6533 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6534 predicate(UseSSE >= 1 && UseAVX == 0); 6535 match(Set dst (AddReductionVD dst src2)); 6536 effect(TEMP tmp, TEMP dst); 6537 format %{ "addsd $dst,$src2\n\t" 6538 "pshufd $tmp,$src2,0xE\n\t" 6539 "addsd $dst,$tmp\t! add reduction2D" %} 6540 ins_encode %{ 6541 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 6542 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6543 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6549 predicate(UseAVX > 0); 6550 match(Set dst (AddReductionVD dst src2)); 6551 effect(TEMP tmp, TEMP dst); 6552 format %{ "vaddsd $dst,$dst,$src2\n\t" 6553 "pshufd $tmp,$src2,0xE\n\t" 6554 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 6555 ins_encode %{ 6556 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6557 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6558 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6559 %} 6560 ins_pipe( pipe_slow ); 6561 %} 6562 6563 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 6564 predicate(UseAVX > 0); 6565 match(Set dst (AddReductionVD dst src2)); 6566 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6567 format %{ "vaddsd $dst,$dst,$src2\n\t" 6568 "pshufd $tmp,$src2,0xE\n\t" 6569 "vaddsd $dst,$dst,$tmp\n\t" 6570 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6571 "vaddsd $dst,$dst,$tmp2\n\t" 6572 "pshufd $tmp,$tmp2,0xE\n\t" 6573 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 6574 ins_encode %{ 6575 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6576 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6577 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6578 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6579 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6580 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6581 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6582 %} 6583 ins_pipe( pipe_slow ); 6584 %} 6585 6586 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 6587 predicate(UseAVX > 2); 6588 match(Set dst (AddReductionVD dst src2)); 6589 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6590 format %{ "vaddsd $dst,$dst,$src2\n\t" 6591 "pshufd $tmp,$src2,0xE\n\t" 6592 "vaddsd $dst,$dst,$tmp\n\t" 6593 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6594 "vaddsd $dst,$dst,$tmp2\n\t" 6595 "pshufd $tmp,$tmp2,0xE\n\t" 6596 "vaddsd $dst,$dst,$tmp\n\t" 6597 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6598 "vaddsd $dst,$dst,$tmp2\n\t" 6599 "pshufd $tmp,$tmp2,0xE\n\t" 6600 "vaddsd $dst,$dst,$tmp\n\t" 6601 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6602 "vaddsd $dst,$dst,$tmp2\n\t" 6603 "pshufd $tmp,$tmp2,0xE\n\t" 6604 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 6605 ins_encode %{ 6606 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6607 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6608 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6609 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6610 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6611 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6612 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6613 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6614 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6615 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6616 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6617 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6618 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6619 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6620 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct rssub2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6626 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6627 match(Set dst (SubReductionVFP dst src2)); 6628 effect(TEMP dst, TEMP tmp); 6629 format %{ "subss $dst,$src2\n\t" 6630 "pshufd $tmp,$src2,0x01\n\t" 6631 "subss $dst,$dst,$tmp\t! sub reduction2F" %} 6632 ins_encode %{ 6633 __ subss($dst$$XMMRegister, $src2$$XMMRegister); 6634 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6635 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 instruct rvsub2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 6641 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6642 match(Set dst (SubReductionVFP dst src2)); 6643 effect(TEMP dst, TEMP tmp); 6644 format %{ "vsubss $dst,$dst,$src2\n\t" 6645 "pshufd $tmp,$src2,0x01\n\t" 6646 "vsubss $dst,$dst,$tmp\t! sub reduction2F" %} 6647 ins_encode %{ 6648 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6649 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6650 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6651 %} 6652 ins_pipe( pipe_slow ); 6653 %} 6654 6655 instruct rssub4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6656 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6657 match(Set dst (SubReductionVFP dst src2)); 6658 effect(TEMP dst, TEMP tmp); 6659 format %{ "subss $dst,$src2\n\t" 6660 "pshufd $tmp,$src2,0x01\n\t" 6661 "subss $dst,$tmp\n\t" 6662 "pshufd $tmp,$src2,0x02\n\t" 6663 "subss $dst,$tmp\n\t" 6664 "pshufd $tmp,$src2,0x03\n\t" 6665 "subss $dst,$tmp\t! sub reduction4F" %} 6666 ins_encode %{ 6667 __ subss($dst$$XMMRegister, $src2$$XMMRegister); 6668 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6669 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6670 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6671 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6672 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6673 __ subss($dst$$XMMRegister, $tmp$$XMMRegister); 6674 %} 6675 ins_pipe( pipe_slow ); 6676 %} 6677 6678 instruct rvsub4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 6679 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6680 match(Set dst (SubReductionVFP dst src2)); 6681 effect(TEMP tmp, TEMP dst); 6682 format %{ "vsubss $dst,dst,$src2\n\t" 6683 "pshufd $tmp,$src2,0x01\n\t" 6684 "vsubss $dst,$dst,$tmp\n\t" 6685 "pshufd $tmp,$src2,0x02\n\t" 6686 "vsubss $dst,$dst,$tmp\n\t" 6687 "pshufd $tmp,$src2,0x03\n\t" 6688 "vsubss $dst,$dst,$tmp\t! sub reduction4F" %} 6689 ins_encode %{ 6690 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6691 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6692 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6693 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6694 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6695 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6696 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6697 %} 6698 ins_pipe( pipe_slow ); 6699 %} 6700 6701 instruct rsub8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 6702 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6703 match(Set dst (SubReductionVFP dst src2)); 6704 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6705 format %{ "vsubss $dst,$dst,$src2\n\t" 6706 "pshufd $tmp,$src2,0x01\n\t" 6707 "vsubss $dst,$dst,$tmp\n\t" 6708 "pshufd $tmp,$src2,0x02\n\t" 6709 "vsubss $dst,$dst,$tmp\n\t" 6710 "pshufd $tmp,$src2,0x03\n\t" 6711 "vsubss $dst,$dst,$tmp\n\t" 6712 "vextractf128_high $tmp2,$src2\n\t" 6713 "vsubss $dst,$dst,$tmp2\n\t" 6714 "pshufd $tmp,$tmp2,0x01\n\t" 6715 "vsubss $dst,$dst,$tmp\n\t" 6716 "pshufd $tmp,$tmp2,0x02\n\t" 6717 "vsubss $dst,$dst,$tmp\n\t" 6718 "pshufd $tmp,$tmp2,0x03\n\t" 6719 "vsubss $dst,$dst,$tmp\t! sub reduction8F" %} 6720 ins_encode %{ 6721 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6722 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6723 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6724 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6725 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6726 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6727 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6728 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 6729 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6730 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6731 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6732 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6733 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6734 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6735 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 instruct rsub16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 6741 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 6742 match(Set dst (SubReductionVFP dst src2)); 6743 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6744 format %{ "vsubss $dst,$dst,$src2\n\t" 6745 "pshufd $tmp,$src2,0x01\n\t" 6746 "vsubss $dst,$dst,$tmp\n\t" 6747 "pshufd $tmp,$src2,0x02\n\t" 6748 "vsubss $dst,$dst,$tmp\n\t" 6749 "pshufd $tmp,$src2,0x03\n\t" 6750 "vsubss $dst,$dst,$tmp\n\t" 6751 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6752 "vsubss $dst,$dst,$tmp2\n\t" 6753 "pshufd $tmp,$tmp2,0x01\n\t" 6754 "vsubss $dst,$dst,$tmp\n\t" 6755 "pshufd $tmp,$tmp2,0x02\n\t" 6756 "vsubss $dst,$dst,$tmp\n\t" 6757 "pshufd $tmp,$tmp2,0x03\n\t" 6758 "vsubss $dst,$dst,$tmp\n\t" 6759 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6760 "vsubss $dst,$dst,$tmp2\n\t" 6761 "pshufd $tmp,$tmp2,0x01\n\t" 6762 "vsubss $dst,$dst,$tmp\n\t" 6763 "pshufd $tmp,$tmp2,0x02\n\t" 6764 "vsubss $dst,$dst,$tmp\n\t" 6765 "pshufd $tmp,$tmp2,0x03\n\t" 6766 "vsubss $dst,$dst,$tmp\n\t" 6767 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6768 "vsubss $dst,$dst,$tmp2\n\t" 6769 "pshufd $tmp,$tmp2,0x01\n\t" 6770 "vsubss $dst,$dst,$tmp\n\t" 6771 "pshufd $tmp,$tmp2,0x02\n\t" 6772 "vsubss $dst,$dst,$tmp\n\t" 6773 "pshufd $tmp,$tmp2,0x03\n\t" 6774 "vsubss $dst,$dst,$tmp\t! sub reduction16F" %} 6775 ins_encode %{ 6776 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6777 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 6778 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6779 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 6780 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6781 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 6782 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6783 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6784 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6785 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6786 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6787 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6788 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6789 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6790 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6791 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6792 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6793 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6794 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6795 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6796 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6797 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6798 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6799 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6800 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6801 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 6802 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6803 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 6804 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6805 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 6806 __ vsubss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 6811 instruct rssub2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6812 predicate(UseSSE >= 1 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6813 match(Set dst (SubReductionVFP dst src2)); 6814 effect(TEMP tmp, TEMP dst); 6815 format %{ "subsd $dst,$src2\n\t" 6816 "pshufd $tmp,$src2,0xE\n\t" 6817 "subsd $dst,$tmp\t! sub reduction2D" %} 6818 ins_encode %{ 6819 __ subsd($dst$$XMMRegister, $src2$$XMMRegister); 6820 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6821 __ subsd($dst$$XMMRegister, $tmp$$XMMRegister); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 instruct rvsub2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 6827 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6828 match(Set dst (SubReductionVFP dst src2)); 6829 effect(TEMP tmp, TEMP dst); 6830 format %{ "vsubsd $dst,$dst,$src2\n\t" 6831 "pshufd $tmp,$src2,0xE\n\t" 6832 "vsubsd $dst,$dst,$tmp\t! sub reduction2D" %} 6833 ins_encode %{ 6834 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6835 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6836 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6837 %} 6838 ins_pipe( pipe_slow ); 6839 %} 6840 6841 instruct rvsub4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 6842 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6843 match(Set dst (SubReductionVFP dst src2)); 6844 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6845 format %{ "vsubsd $dst,$dst,$src2\n\t" 6846 "pshufd $tmp,$src2,0xE\n\t" 6847 "vsubsd $dst,$dst,$tmp\n\t" 6848 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6849 "vsubsd $dst,$dst,$tmp2\n\t" 6850 "pshufd $tmp,$tmp2,0xE\n\t" 6851 "vsubsd $dst,$dst,$tmp\t! sub reduction4D" %} 6852 ins_encode %{ 6853 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6854 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6855 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6856 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6857 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6858 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6859 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6860 %} 6861 ins_pipe( pipe_slow ); 6862 %} 6863 6864 instruct rvsub8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 6865 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 6866 match(Set dst (SubReductionVFP dst src2)); 6867 effect(TEMP tmp, TEMP dst, TEMP tmp2); 6868 format %{ "vsubsd $dst,$dst,$src2\n\t" 6869 "pshufd $tmp,$src2,0xE\n\t" 6870 "vsubsd $dst,$dst,$tmp\n\t" 6871 "vextractf32x4 $tmp2,$src2,0x1\n\t" 6872 "vsubsd $dst,$dst,$tmp2\n\t" 6873 "pshufd $tmp,$tmp2,0xE\n\t" 6874 "vsubsd $dst,$dst,$tmp\n\t" 6875 "vextractf32x4 $tmp2,$src2,0x2\n\t" 6876 "vsubsd $dst,$dst,$tmp2\n\t" 6877 "pshufd $tmp,$tmp2,0xE\n\t" 6878 "vsubsd $dst,$dst,$tmp\n\t" 6879 "vextractf32x4 $tmp2,$src2,0x3\n\t" 6880 "vsubsd $dst,$dst,$tmp2\n\t" 6881 "pshufd $tmp,$tmp2,0xE\n\t" 6882 "vsubsd $dst,$dst,$tmp\t! sub reduction8D" %} 6883 ins_encode %{ 6884 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 6885 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 6886 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6887 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 6888 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6889 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6890 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6891 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 6892 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6893 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6894 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6895 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 6896 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 6897 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 6898 __ vsubsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 6899 %} 6900 ins_pipe( pipe_slow ); 6901 %} 6902 6903 instruct rsmul8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 6904 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6905 match(Set dst (MulReductionVI src1 src2)); 6906 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 6907 format %{ "pmovsxbw $tmp,$src2\n\t" 6908 "pshufd $tmp1,$tmp,0xE\n\t" 6909 "pmullw $tmp,$tmp1\n\t" 6910 "pshufd $tmp1,$tmp,0x1\n\t" 6911 "pmullw $tmp,$tmp1\n\t" 6912 "pextrw $tmp2,$tmp, 0x1\n\t" 6913 "pextrw $tmp3,$tmp, 0x0\n\t" 6914 "imul $tmp2,$tmp3 \n\t" 6915 "movsbl $dst,$src1\n\t" 6916 "imull $dst,$tmp2\n\t" 6917 "movsbl $dst,$dst\t! mul reduction8B" %} 6918 ins_encode %{ 6919 __ pmovsxbw($tmp$$XMMRegister, $src2$$XMMRegister); 6920 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 6921 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6922 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 6923 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6924 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6925 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6926 __ imull($tmp2$$Register, $tmp3$$Register); 6927 __ movsbl($dst$$Register, $src1$$Register); 6928 __ imull($dst$$Register, $tmp2$$Register); 6929 __ movsbl($dst$$Register, $dst$$Register); 6930 %} 6931 ins_pipe( pipe_slow ); 6932 %} 6933 6934 instruct rsmul16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 6935 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6936 match(Set dst (MulReductionVI src1 src2)); 6937 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 6938 format %{ "pmovsxbw $tmp,$src2\n\t" 6939 "pshufd $tmp1,$src2,0xEE\n\t" 6940 "pmovsxbw $tmp1,$tmp1\n\t" 6941 "pmullw $tmp,$tmp1\n\t" 6942 "pshufd $tmp1,$tmp,0xE\n\t" 6943 "pmullw $tmp,$tmp1\n\t" 6944 "pshufd $tmp1,$tmp,0x1\n\t" 6945 "pmullw $tmp,$tmp1\n\t" 6946 "pextrw $tmp2,$tmp, 0x1\n\t" 6947 "pextrw $tmp3,$tmp, 0x0\n\t" 6948 "imull $tmp2,$tmp3 \n\t" 6949 "movsbl $dst,$src1\n\t" 6950 "imull $dst,$tmp2\n\t" 6951 "movsbl $dst,$dst\t! mul reduction16B" %} 6952 ins_encode %{ 6953 int vector_len = 0; 6954 __ pmovsxbw($tmp$$XMMRegister, $src2$$XMMRegister); 6955 __ pshufd($tmp1$$XMMRegister, $src2$$XMMRegister, 0xEE); 6956 __ pmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister); 6957 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6958 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 6959 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6960 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 6961 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 6962 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 6963 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 6964 __ imull($tmp2$$Register, $tmp3$$Register); 6965 __ movsbl($dst$$Register, $src1$$Register); 6966 __ imull($dst$$Register, $tmp2$$Register); 6967 __ movsbl($dst$$Register, $dst$$Register); 6968 %} 6969 ins_pipe( pipe_slow ); 6970 %} 6971 6972 instruct rvmul32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 6973 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 6974 match(Set dst (MulReductionVI src1 src2)); 6975 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 6976 format %{ "vextracti128_high $tmp,$src2\n\t" 6977 "pmovsxbw $tmp,$tmp\n\t" 6978 "pmovsxbw $tmp1,$src2\n\t" 6979 "vpmullw $tmp,$tmp,$tmp1\n\t" 6980 "vextracti128_high $tmp1,$tmp\n\t" 6981 "vpmullw $tmp,$tmp,$tmp1\n\t" 6982 "pshufd $tmp1,$tmp,0xE\n\t" 6983 "vpmullw $tmp,$tmp,$tmp1\n\t" 6984 "pshufd $tmp1,$tmp,0x1\n\t" 6985 "vpmullw $tmp,$tmp,$tmp1\n\t" 6986 "pextrw $tmp2,$tmp, 0x1\n\t" 6987 "pextrw $tmp3,$tmp, 0x0\n\t" 6988 "imull $tmp2,$tmp3 \n\t" 6989 "movsbl $dst,$src1\n\t" 6990 "imull $dst,$tmp2\n\t" 6991 "movsbl $dst,$dst\t! mul reduction32B" %} 6992 ins_encode %{ 6993 int vector_len = 1; 6994 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 6995 __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 6996 __ vpmovsxbw($tmp1$$XMMRegister, $src2$$XMMRegister, vector_len); 6997 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 6998 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 6999 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7000 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7001 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7002 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7003 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7004 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7005 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7006 __ imull($tmp2$$Register, $tmp3$$Register); 7007 __ movsbl($dst$$Register, $src1$$Register); 7008 __ imull($dst$$Register, $tmp2$$Register); 7009 __ movsbl($dst$$Register, $dst$$Register); 7010 %} 7011 ins_pipe( pipe_slow ); 7012 %} 7013 7014 instruct rvmul64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7015 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7016 match(Set dst (MulReductionVI src1 src2)); 7017 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7018 format %{ "vextracti64x4_high $tmp,$src2\n\t" 7019 "vpmovsxbw $tmp,$tmp\n\t" 7020 "vpmovsxbw $tmp1,$src2\n\t" 7021 "vpmullw $tmp,$tmp,$tmp1\n\t" 7022 "vextracti64x4_high $tmp1,$tmp\n\t" 7023 "vpmullw $tmp,$tmp,$tmp1\n\t" 7024 "vextracti128_high $tmp1,$tmp\n\t" 7025 "vpmullw $tmp,$tmp,$tmp1\n\t" 7026 "pshufd $tmp1,$tmp,0xE\n\t" 7027 "vpmullw $tmp,$tmp,$tmp1\n\t" 7028 "pshufd $tmp1,$tmp,0x1\n\t" 7029 "vpmullw $tmp,$tmp,$tmp1\n\t" 7030 "pextrw $tmp2,$tmp, 0x1\n\t" 7031 "pextrw $tmp3,$tmp, 0x0\n\t" 7032 "imull $tmp2,$tmp3 \n\t" 7033 "movsbl $dst,$src1\n\t" 7034 "imull $dst,$tmp2\n\t" 7035 "movsbl $dst,$dst\t! mul reduction64B" %} 7036 ins_encode %{ 7037 int vector_len = 2; 7038 __ vextracti64x4_high($tmp$$XMMRegister, $src2$$XMMRegister); 7039 __ vpmovsxbw($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 7040 __ vpmovsxbw($tmp1$$XMMRegister, $src2$$XMMRegister, vector_len); 7041 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7042 __ vextracti64x4_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 7043 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 1); 7044 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 7045 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7046 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7047 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7048 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7049 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7050 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7051 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7052 __ imull($tmp2$$Register, $tmp3$$Register); 7053 __ movsbl($dst$$Register, $src1$$Register); 7054 __ imull($dst$$Register, $tmp2$$Register); 7055 __ movsbl($dst$$Register, $dst$$Register); 7056 %} 7057 ins_pipe( pipe_slow ); 7058 %} 7059 7060 instruct rsmul4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 7061 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7062 match(Set dst (MulReductionVI src1 src2)); 7063 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3); 7064 format %{ "pshufd $tmp,$src2,0x1\n\t" 7065 "pmullw $tmp,$src2\n\t" 7066 "pextrw $tmp2,$tmp, 0x1\n\t" 7067 "pextrw $tmp3,$tmp, 0x0\n\t" 7068 "imull $tmp2,$tmp3 \n\t" 7069 "movswl $dst,$src1\n\t" 7070 "imull $dst,$tmp2\n\t" 7071 "movswl $dst,$dst\t! mul reduction4S" %} 7072 ins_encode %{ 7073 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 7074 __ pmullw($tmp$$XMMRegister, $src2$$XMMRegister); 7075 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7076 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7077 __ imull($tmp2$$Register, $tmp3$$Register); 7078 __ movswl($dst$$Register, $src1$$Register); 7079 __ imull($dst$$Register, $tmp2$$Register); 7080 __ movswl($dst$$Register, $dst$$Register); 7081 %} 7082 ins_pipe( pipe_slow ); 7083 %} 7084 7085 instruct rsmul8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7086 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7087 match(Set dst (MulReductionVI src1 src2)); 7088 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7089 format %{ "pshufd $tmp,$src2,0xE\n\t" 7090 "pmullw $tmp,$src2\n\t" 7091 "pshufd $tmp1,$tmp,0x1\n\t" 7092 "pmullw $tmp,$tmp1\n\t" 7093 "pextrw $tmp2,$tmp, 0x1\n\t" 7094 "pextrw $tmp3,$tmp, 0x0\n\t" 7095 "imul $tmp2,$tmp3 \n\t" 7096 "movswl $dst,$src1\n\t" 7097 "imull $dst,$tmp2\n\t" 7098 "movswl $dst,$dst\t! mul reduction8S" %} 7099 ins_encode %{ 7100 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7101 __ pmullw($tmp$$XMMRegister, $src2$$XMMRegister); 7102 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7103 __ pmullw($tmp$$XMMRegister, $tmp1$$XMMRegister); 7104 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7105 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7106 __ imull($tmp2$$Register, $tmp3$$Register); 7107 __ movswl($dst$$Register, $src1$$Register); 7108 __ imull($dst$$Register, $tmp2$$Register); 7109 __ movswl($dst$$Register, $dst$$Register); 7110 %} 7111 ins_pipe( pipe_slow ); 7112 %} 7113 7114 instruct rvmul16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7115 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7116 match(Set dst (MulReductionVI src1 src2)); 7117 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7118 format %{ "vextracti128_high $tmp,$src2\n\t" 7119 "vpmullw $tmp,$tmp,$src2\n\t" 7120 "pshufd $tmp1,$tmp,0xE\n\t" 7121 "pmullw $tmp,$tmp1\n\t" 7122 "pshufd $tmp1,$tmp,0x1\n\t" 7123 "pmullw $tmp,$tmp1\n\t" 7124 "pextrw $tmp2,$tmp, 0x1\n\t" 7125 "pextrw $tmp3,$tmp, 0x0\n\t" 7126 "imul $tmp2,$tmp3 \n\t" 7127 "movswl $dst,$src1\n\t" 7128 "imull $dst,$tmp2\n\t" 7129 "movswl $dst,$dst\t! mul reduction16S" %} 7130 ins_encode %{ 7131 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7132 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 1); 7133 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7134 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7135 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7136 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, 0); 7137 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7138 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7139 __ imull($tmp2$$Register, $tmp3$$Register); 7140 __ movswl($dst$$Register, $src1$$Register); 7141 __ imull($dst$$Register, $tmp2$$Register); 7142 __ movswl($dst$$Register, $dst$$Register); 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct rvmul32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp1, rRegI tmp2, rRegI tmp3) %{ 7148 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7149 match(Set dst (MulReductionVI src1 src2)); 7150 effect(TEMP dst, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); 7151 format %{ "vextracti64x4_high $tmp1,$src2\n\t" 7152 "vpmullw $tmp1,$tmp1,$src2\n\t" 7153 "vextracti128_high $tmp,$tmp1\n\t" 7154 "vpmullw $tmp,$tmp,$tmp1\n\t" 7155 "pshufd $tmp1,$tmp,0xE\n\t" 7156 "pmullw $tmp,$tmp1\n\t" 7157 "pshufd $tmp1,$tmp,0x1\n\t" 7158 "pmullw $tmp,$tmp1\n\t" 7159 "pextrw $tmp2,$tmp, 0x1\n\t" 7160 "pextrw $tmp3,$tmp, 0x0\n\t" 7161 "imul $tmp2,$tmp3 \n\t" 7162 "movswl $dst,$src1\n\t" 7163 "imull $dst,$tmp2\n\t" 7164 "movswl $dst,$dst\t! mul reduction32S" %} 7165 ins_encode %{ 7166 int vector_len = 0; 7167 __ vextracti64x4_high($tmp1$$XMMRegister, $src2$$XMMRegister); 7168 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $src2$$XMMRegister, 1); 7169 __ vextracti128_high($tmp$$XMMRegister, $tmp1$$XMMRegister); 7170 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7171 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0xE); 7172 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7173 __ pshufd($tmp1$$XMMRegister, $tmp$$XMMRegister, 0x1); 7174 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 7175 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 7176 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 7177 __ imull($tmp2$$Register, $tmp3$$Register); 7178 __ movswl($dst$$Register, $src1$$Register); 7179 __ imull($dst$$Register, $tmp2$$Register); 7180 __ movswl($dst$$Register, $dst$$Register); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 7186 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 7187 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7188 match(Set dst (MulReductionVI src1 src2)); 7189 effect(TEMP tmp, TEMP tmp2); 7190 format %{ "pshufd $tmp2,$src2,0x1\n\t" 7191 "pmulld $tmp2,$src2\n\t" 7192 "movd $tmp,$src1\n\t" 7193 "pmulld $tmp2,$tmp\n\t" 7194 "movd $dst,$tmp2\t! mul reduction2I" %} 7195 ins_encode %{ 7196 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7197 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 7198 __ movdl($tmp$$XMMRegister, $src1$$Register); 7199 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 7200 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7201 %} 7202 ins_pipe( pipe_slow ); 7203 %} 7204 7205 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 7206 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7207 match(Set dst (MulReductionVI src1 src2)); 7208 effect(TEMP tmp, TEMP tmp2); 7209 format %{ "pshufd $tmp2,$src2,0x1\n\t" 7210 "vpmulld $tmp,$src2,$tmp2\n\t" 7211 "movd $tmp2,$src1\n\t" 7212 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7213 "movd $dst,$tmp2\t! mul reduction2I" %} 7214 ins_encode %{ 7215 int vector_len = 0; 7216 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7217 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7218 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7219 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7220 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7221 %} 7222 ins_pipe( pipe_slow ); 7223 %} 7224 7225 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 7226 predicate(UseSSE > 3 && UseAVX == 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7227 match(Set dst (MulReductionVI src1 src2)); 7228 effect(TEMP tmp, TEMP tmp2); 7229 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7230 "pmulld $tmp2,$src2\n\t" 7231 "pshufd $tmp,$tmp2,0x1\n\t" 7232 "pmulld $tmp2,$tmp\n\t" 7233 "movd $tmp,$src1\n\t" 7234 "pmulld $tmp2,$tmp\n\t" 7235 "movd $dst,$tmp2\t! mul reduction4I" %} 7236 ins_encode %{ 7237 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7238 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 7239 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 7240 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 7241 __ movdl($tmp$$XMMRegister, $src1$$Register); 7242 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 7243 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7244 %} 7245 ins_pipe( pipe_slow ); 7246 %} 7247 7248 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 7249 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7250 match(Set dst (MulReductionVI src1 src2)); 7251 effect(TEMP tmp, TEMP tmp2); 7252 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7253 "vpmulld $tmp,$src2,$tmp2\n\t" 7254 "pshufd $tmp2,$tmp,0x1\n\t" 7255 "vpmulld $tmp,$tmp,$tmp2\n\t" 7256 "movd $tmp2,$src1\n\t" 7257 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7258 "movd $dst,$tmp2\t! mul reduction4I" %} 7259 ins_encode %{ 7260 int vector_len = 0; 7261 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7262 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7263 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 7264 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7265 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7266 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7267 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7268 %} 7269 ins_pipe( pipe_slow ); 7270 %} 7271 7272 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 7273 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7274 match(Set dst (MulReductionVI src1 src2)); 7275 effect(TEMP tmp, TEMP tmp2); 7276 format %{ "vextracti128_high $tmp,$src2\n\t" 7277 "vpmulld $tmp,$tmp,$src2\n\t" 7278 "pshufd $tmp2,$tmp,0xE\n\t" 7279 "vpmulld $tmp,$tmp,$tmp2\n\t" 7280 "pshufd $tmp2,$tmp,0x1\n\t" 7281 "vpmulld $tmp,$tmp,$tmp2\n\t" 7282 "movd $tmp2,$src1\n\t" 7283 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7284 "movd $dst,$tmp2\t! mul reduction8I" %} 7285 ins_encode %{ 7286 int vector_len = 0; 7287 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7288 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7289 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 7290 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7291 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 7292 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7293 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7294 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 7295 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7296 %} 7297 ins_pipe( pipe_slow ); 7298 %} 7299 7300 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 7301 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 7302 match(Set dst (MulReductionVI src1 src2)); 7303 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 7304 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 7305 "vpmulld $tmp3,$tmp3,$src2\n\t" 7306 "vextracti128_high $tmp,$tmp3\n\t" 7307 "vpmulld $tmp,$tmp,$src2\n\t" 7308 "pshufd $tmp2,$tmp,0xE\n\t" 7309 "vpmulld $tmp,$tmp,$tmp2\n\t" 7310 "pshufd $tmp2,$tmp,0x1\n\t" 7311 "vpmulld $tmp,$tmp,$tmp2\n\t" 7312 "movd $tmp2,$src1\n\t" 7313 "vpmulld $tmp2,$tmp,$tmp2\n\t" 7314 "movd $dst,$tmp2\t! mul reduction16I" %} 7315 ins_encode %{ 7316 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 7317 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 7318 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 7319 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 7320 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 7321 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7322 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 7323 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7324 __ movdl($tmp2$$XMMRegister, $src1$$Register); 7325 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7326 __ movdl($dst$$Register, $tmp2$$XMMRegister); 7327 %} 7328 ins_pipe( pipe_slow ); 7329 %} 7330 7331 #ifdef _LP64 7332 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 7333 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 7334 match(Set dst (MulReductionVL src1 src2)); 7335 effect(TEMP tmp, TEMP tmp2); 7336 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7337 "vpmullq $tmp,$src2,$tmp2\n\t" 7338 "movdq $tmp2,$src1\n\t" 7339 "vpmullq $tmp2,$tmp,$tmp2\n\t" 7340 "movdq $dst,$tmp2\t! mul reduction2L" %} 7341 ins_encode %{ 7342 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 7343 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 7344 __ movdq($tmp2$$XMMRegister, $src1$$Register); 7345 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 7346 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 7352 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 7353 match(Set dst (MulReductionVL src1 src2)); 7354 effect(TEMP tmp, TEMP tmp2); 7355 format %{ "vextracti128_high $tmp,$src2\n\t" 7356 "vpmullq $tmp2,$tmp,$src2\n\t" 7357 "pshufd $tmp,$tmp2,0xE\n\t" 7358 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7359 "movdq $tmp,$src1\n\t" 7360 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7361 "movdq $dst,$tmp2\t! mul reduction4L" %} 7362 ins_encode %{ 7363 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7364 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 7365 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7366 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7367 __ movdq($tmp$$XMMRegister, $src1$$Register); 7368 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7369 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7370 %} 7371 ins_pipe( pipe_slow ); 7372 %} 7373 7374 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 7375 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 7376 match(Set dst (MulReductionVL src1 src2)); 7377 effect(TEMP tmp, TEMP tmp2); 7378 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 7379 "vpmullq $tmp2,$tmp2,$src2\n\t" 7380 "vextracti128_high $tmp,$tmp2\n\t" 7381 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7382 "pshufd $tmp,$tmp2,0xE\n\t" 7383 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7384 "movdq $tmp,$src1\n\t" 7385 "vpmullq $tmp2,$tmp2,$tmp\n\t" 7386 "movdq $dst,$tmp2\t! mul reduction8L" %} 7387 ins_encode %{ 7388 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7389 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 7390 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 7391 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7392 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7393 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7394 __ movdq($tmp$$XMMRegister, $src1$$Register); 7395 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 7396 __ movdq($dst$$Register, $tmp2$$XMMRegister); 7397 %} 7398 ins_pipe( pipe_slow ); 7399 %} 7400 #endif 7401 7402 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 7403 predicate(UseSSE >= 1 && UseAVX == 0); 7404 match(Set dst (MulReductionVF dst src2)); 7405 effect(TEMP dst, TEMP tmp); 7406 format %{ "mulss $dst,$src2\n\t" 7407 "pshufd $tmp,$src2,0x01\n\t" 7408 "mulss $dst,$tmp\t! mul reduction2F" %} 7409 ins_encode %{ 7410 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 7411 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7412 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 7418 predicate(UseAVX > 0); 7419 match(Set dst (MulReductionVF dst src2)); 7420 effect(TEMP tmp, TEMP dst); 7421 format %{ "vmulss $dst,$dst,$src2\n\t" 7422 "pshufd $tmp,$src2,0x01\n\t" 7423 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 7424 ins_encode %{ 7425 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7426 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7427 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7428 %} 7429 ins_pipe( pipe_slow ); 7430 %} 7431 7432 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 7433 predicate(UseSSE >= 1 && UseAVX == 0); 7434 match(Set dst (MulReductionVF dst src2)); 7435 effect(TEMP dst, TEMP tmp); 7436 format %{ "mulss $dst,$src2\n\t" 7437 "pshufd $tmp,$src2,0x01\n\t" 7438 "mulss $dst,$tmp\n\t" 7439 "pshufd $tmp,$src2,0x02\n\t" 7440 "mulss $dst,$tmp\n\t" 7441 "pshufd $tmp,$src2,0x03\n\t" 7442 "mulss $dst,$tmp\t! mul reduction4F" %} 7443 ins_encode %{ 7444 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 7445 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7446 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7447 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7448 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7449 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7450 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 7451 %} 7452 ins_pipe( pipe_slow ); 7453 %} 7454 7455 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 7456 predicate(UseAVX > 0); 7457 match(Set dst (MulReductionVF dst src2)); 7458 effect(TEMP tmp, TEMP dst); 7459 format %{ "vmulss $dst,$dst,$src2\n\t" 7460 "pshufd $tmp,$src2,0x01\n\t" 7461 "vmulss $dst,$dst,$tmp\n\t" 7462 "pshufd $tmp,$src2,0x02\n\t" 7463 "vmulss $dst,$dst,$tmp\n\t" 7464 "pshufd $tmp,$src2,0x03\n\t" 7465 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 7466 ins_encode %{ 7467 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7468 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7469 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7470 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7471 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7472 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7473 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7474 %} 7475 ins_pipe( pipe_slow ); 7476 %} 7477 7478 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 7479 predicate(UseAVX > 0); 7480 match(Set dst (MulReductionVF dst src2)); 7481 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7482 format %{ "vmulss $dst,$dst,$src2\n\t" 7483 "pshufd $tmp,$src2,0x01\n\t" 7484 "vmulss $dst,$dst,$tmp\n\t" 7485 "pshufd $tmp,$src2,0x02\n\t" 7486 "vmulss $dst,$dst,$tmp\n\t" 7487 "pshufd $tmp,$src2,0x03\n\t" 7488 "vmulss $dst,$dst,$tmp\n\t" 7489 "vextractf128_high $tmp2,$src2\n\t" 7490 "vmulss $dst,$dst,$tmp2\n\t" 7491 "pshufd $tmp,$tmp2,0x01\n\t" 7492 "vmulss $dst,$dst,$tmp\n\t" 7493 "pshufd $tmp,$tmp2,0x02\n\t" 7494 "vmulss $dst,$dst,$tmp\n\t" 7495 "pshufd $tmp,$tmp2,0x03\n\t" 7496 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 7497 ins_encode %{ 7498 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7499 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7500 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7501 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7502 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7503 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7504 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7505 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7506 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7507 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7508 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7509 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7510 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7511 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7512 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7513 %} 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 7518 predicate(UseAVX > 2); 7519 match(Set dst (MulReductionVF dst src2)); 7520 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7521 format %{ "vmulss $dst,$dst,$src2\n\t" 7522 "pshufd $tmp,$src2,0x01\n\t" 7523 "vmulss $dst,$dst,$tmp\n\t" 7524 "pshufd $tmp,$src2,0x02\n\t" 7525 "vmulss $dst,$dst,$tmp\n\t" 7526 "pshufd $tmp,$src2,0x03\n\t" 7527 "vmulss $dst,$dst,$tmp\n\t" 7528 "vextractf32x4 $tmp2,$src2,0x1\n\t" 7529 "vmulss $dst,$dst,$tmp2\n\t" 7530 "pshufd $tmp,$tmp2,0x01\n\t" 7531 "vmulss $dst,$dst,$tmp\n\t" 7532 "pshufd $tmp,$tmp2,0x02\n\t" 7533 "vmulss $dst,$dst,$tmp\n\t" 7534 "pshufd $tmp,$tmp2,0x03\n\t" 7535 "vmulss $dst,$dst,$tmp\n\t" 7536 "vextractf32x4 $tmp2,$src2,0x2\n\t" 7537 "vmulss $dst,$dst,$tmp2\n\t" 7538 "pshufd $tmp,$tmp2,0x01\n\t" 7539 "vmulss $dst,$dst,$tmp\n\t" 7540 "pshufd $tmp,$tmp2,0x02\n\t" 7541 "vmulss $dst,$dst,$tmp\n\t" 7542 "pshufd $tmp,$tmp2,0x03\n\t" 7543 "vmulss $dst,$dst,$tmp\n\t" 7544 "vextractf32x4 $tmp2,$src2,0x3\n\t" 7545 "vmulss $dst,$dst,$tmp2\n\t" 7546 "pshufd $tmp,$tmp2,0x01\n\t" 7547 "vmulss $dst,$dst,$tmp\n\t" 7548 "pshufd $tmp,$tmp2,0x02\n\t" 7549 "vmulss $dst,$dst,$tmp\n\t" 7550 "pshufd $tmp,$tmp2,0x03\n\t" 7551 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 7552 ins_encode %{ 7553 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7554 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 7555 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7556 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 7557 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7558 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 7559 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7560 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7561 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7562 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7563 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7564 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7565 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7566 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7567 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7568 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 7569 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7570 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7571 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7572 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7573 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7574 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7575 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7576 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 7577 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7578 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 7579 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7580 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 7581 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7582 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 7583 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7584 %} 7585 ins_pipe( pipe_slow ); 7586 %} 7587 7588 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 7589 predicate(UseSSE >= 1 && UseAVX == 0); 7590 match(Set dst (MulReductionVD dst src2)); 7591 effect(TEMP dst, TEMP tmp); 7592 format %{ "mulsd $dst,$src2\n\t" 7593 "pshufd $tmp,$src2,0xE\n\t" 7594 "mulsd $dst,$tmp\t! mul reduction2D" %} 7595 ins_encode %{ 7596 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 7597 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7598 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 7604 predicate(UseAVX > 0); 7605 match(Set dst (MulReductionVD dst src2)); 7606 effect(TEMP tmp, TEMP dst); 7607 format %{ "vmulsd $dst,$dst,$src2\n\t" 7608 "pshufd $tmp,$src2,0xE\n\t" 7609 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 7610 ins_encode %{ 7611 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7612 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7613 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7614 %} 7615 ins_pipe( pipe_slow ); 7616 %} 7617 7618 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 7619 predicate(UseAVX > 0); 7620 match(Set dst (MulReductionVD dst src2)); 7621 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7622 format %{ "vmulsd $dst,$dst,$src2\n\t" 7623 "pshufd $tmp,$src2,0xE\n\t" 7624 "vmulsd $dst,$dst,$tmp\n\t" 7625 "vextractf128_high $tmp2,$src2\n\t" 7626 "vmulsd $dst,$dst,$tmp2\n\t" 7627 "pshufd $tmp,$tmp2,0xE\n\t" 7628 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 7629 ins_encode %{ 7630 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7631 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7632 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7633 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 7634 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7635 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7636 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7637 %} 7638 ins_pipe( pipe_slow ); 7639 %} 7640 7641 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 7642 predicate(UseAVX > 2); 7643 match(Set dst (MulReductionVD dst src2)); 7644 effect(TEMP tmp, TEMP dst, TEMP tmp2); 7645 format %{ "vmulsd $dst,$dst,$src2\n\t" 7646 "pshufd $tmp,$src2,0xE\n\t" 7647 "vmulsd $dst,$dst,$tmp\n\t" 7648 "vextractf32x4 $tmp2,$src2,0x1\n\t" 7649 "vmulsd $dst,$dst,$tmp2\n\t" 7650 "pshufd $tmp,$src2,0xE\n\t" 7651 "vmulsd $dst,$dst,$tmp\n\t" 7652 "vextractf32x4 $tmp2,$src2,0x2\n\t" 7653 "vmulsd $dst,$dst,$tmp2\n\t" 7654 "pshufd $tmp,$tmp2,0xE\n\t" 7655 "vmulsd $dst,$dst,$tmp\n\t" 7656 "vextractf32x4 $tmp2,$src2,0x3\n\t" 7657 "vmulsd $dst,$dst,$tmp2\n\t" 7658 "pshufd $tmp,$tmp2,0xE\n\t" 7659 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 7660 ins_encode %{ 7661 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 7662 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 7663 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7664 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 7665 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7666 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7667 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7668 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 7669 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7670 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7671 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7672 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 7673 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 7674 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 7675 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 //--------------------Min Reduction -------------------- 7681 instruct rsmin8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7682 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7683 match(Set dst (MinReductionV src1 src2)); 7684 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7685 format %{ "pshufd $tmp,$src2,0x1\n\t" 7686 "pminsb $tmp,$src2\n\t" 7687 "pextrb $tmp2,$tmp, 0x1\n\t" 7688 "movsbl $tmp2,$tmp2\n\t" 7689 "pextrb $tmp3,$tmp,0x0\n\t" 7690 "movsbl $tmp3,$tmp3\n\t" 7691 "cmpl $tmp2,$tmp3\n\t" 7692 "cmovl $tmp3,$tmp2\n\t" 7693 "cmpl $src1,$tmp3\n\t" 7694 "cmovl $tmp3,$src1, 0x0\n\t" 7695 "movl $dst,$tmp2\n\t" 7696 "pextrb $tmp2,$tmp\n\t" 7697 "movsbl $tmp2,$tmp2\n\t" 7698 "pextrb $tmp3,$tmp\n\t" 7699 "movsbl $tmp3,$tmp3\n\t" 7700 "cmpl $tmp2,$tmp3\n\t" 7701 "cmovl $tmp3,$tmp2\n\t" 7702 "cmpl $tmp3,$dst\n\t" 7703 "cmovl $dst,$tmp3\t! min reduction4S" %} 7704 ins_encode %{ 7705 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 7706 __ pminsb($tmp$$XMMRegister, $src2$$XMMRegister); 7707 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7708 __ movsbl($tmp2$$Register, $tmp2$$Register); 7709 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7710 __ movsbl($tmp3$$Register, $tmp3$$Register); 7711 __ cmpl($tmp2$$Register, $tmp3$$Register); 7712 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7713 __ cmpl($src1$$Register, $tmp3$$Register); 7714 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7715 __ movl($dst$$Register, $tmp3$$Register); 7716 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7717 __ movsbl($tmp2$$Register, $tmp2$$Register); 7718 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7719 __ movsbl($tmp3$$Register, $tmp3$$Register); 7720 __ cmpl($tmp2$$Register, $tmp3$$Register); 7721 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7722 __ cmpl($tmp3$$Register, $dst$$Register); 7723 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7724 __ movsbl($dst$$Register, $dst$$Register); 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 instruct rsmin16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7730 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7731 match(Set dst (MinReductionV src1 src2)); 7732 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7733 format %{ "pshufd $tmp4,$src2,0xE\n\t" 7734 "pminsb $tmp4,$src2\n\t" 7735 "pshufd $tmp,$tmp4,0x1\n\t" 7736 "pminsb $tmp,$tmp4\n\t" 7737 "pextrb $tmp2,$tmp, 0x1\n\t" 7738 "movsbl $tmp2,$tmp2\n\t" 7739 "pextrb $tmp3,$tmp,0x0\n\t" 7740 "movsbl $tmp3,$tmp3\n\t" 7741 "cmpl $tmp2,$tmp3\n\t" 7742 "cmovl $tmp3,$tmp2\n\t" 7743 "cmpl $src1,$tmp3\n\t" 7744 "cmovl $tmp3,$src1, 0x0\n\t" 7745 "movl $dst,$tmp2\n\t" 7746 "pextrb $tmp2,$tmp\n\t" 7747 "movsbl $tmp2,$tmp2\n\t" 7748 "pextrb $tmp3,$tmp\n\t" 7749 "movsbl $tmp3,$tmp3\n\t" 7750 "cmpl $tmp2,$tmp3\n\t" 7751 "cmovl $tmp3,$tmp2\n\t" 7752 "cmpl $tmp3,$dst\n\t" 7753 "cmovl $dst,$tmp3\t! min reduction4S" %} 7754 ins_encode %{ 7755 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 7756 __ pminsb($tmp4$$XMMRegister, $src2$$XMMRegister); 7757 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7758 __ pminsb($tmp$$XMMRegister, $tmp4$$XMMRegister); 7759 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7760 __ movsbl($tmp2$$Register, $tmp2$$Register); 7761 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7762 __ movsbl($tmp3$$Register, $tmp3$$Register); 7763 __ cmpl($tmp2$$Register, $tmp3$$Register); 7764 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7765 __ cmpl($src1$$Register, $tmp3$$Register); 7766 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7767 __ movl($dst$$Register, $tmp3$$Register); 7768 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7769 __ movsbl($tmp2$$Register, $tmp2$$Register); 7770 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7771 __ movsbl($tmp3$$Register, $tmp3$$Register); 7772 __ cmpl($tmp2$$Register, $tmp3$$Register); 7773 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7774 __ cmpl($tmp3$$Register, $dst$$Register); 7775 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7776 __ movsbl($dst$$Register, $dst$$Register); 7777 %} 7778 ins_pipe( pipe_slow ); 7779 %} 7780 7781 instruct rvmin16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7782 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7783 match(Set dst (MinReductionV src1 src2)); 7784 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7785 format %{ "pshufd $tmp4,$src2,0xE\n\t" 7786 "vpminsb $tmp,$tmp4,$src2\n\t" 7787 "pshufd $tmp,$tmp4,0x1\n\t" 7788 "vpminsb $tmp,$tmp,$tmp4\n\t" 7789 "pextrb $tmp2,$tmp, 0x1\n\t" 7790 "movsbl $tmp2,$tmp2\n\t" 7791 "pextrb $tmp3,$tmp,0x0\n\t" 7792 "movsbl $tmp3,$tmp3\n\t" 7793 "cmpl $tmp2,$tmp3\n\t" 7794 "cmovl $tmp3,$tmp2\n\t" 7795 "cmpl $src1,$tmp3\n\t" 7796 "cmovl $tmp3,$src1, 0x0\n\t" 7797 "movl $dst,$tmp2\n\t" 7798 "pextrb $tmp2,$tmp\n\t" 7799 "movsbl $tmp2,$tmp2\n\t" 7800 "pextrb $tmp3,$tmp\n\t" 7801 "movsbl $tmp3,$tmp3\n\t" 7802 "cmpl $tmp2,$tmp3\n\t" 7803 "cmovl $tmp3,$tmp2\n\t" 7804 "cmpl $tmp3,$dst\n\t" 7805 "cmovl $dst,$tmp3\t! min reduction4S" %} 7806 ins_encode %{ 7807 int vector_len = 0; 7808 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 7809 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 0); 7810 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7811 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 7812 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7813 __ movsbl($tmp2$$Register, $tmp2$$Register); 7814 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7815 __ movsbl($tmp3$$Register, $tmp3$$Register); 7816 __ cmpl($tmp2$$Register, $tmp3$$Register); 7817 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7818 __ cmpl($src1$$Register, $tmp3$$Register); 7819 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7820 __ movl($dst$$Register, $tmp3$$Register); 7821 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7822 __ movsbl($tmp2$$Register, $tmp2$$Register); 7823 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7824 __ movsbl($tmp3$$Register, $tmp3$$Register); 7825 __ cmpl($tmp2$$Register, $tmp3$$Register); 7826 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7827 __ cmpl($tmp3$$Register, $dst$$Register); 7828 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7829 __ movsbl($dst$$Register, $dst$$Register); 7830 %} 7831 ins_pipe( pipe_slow ); 7832 %} 7833 7834 instruct rvmin32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7835 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7836 match(Set dst (MinReductionV src1 src2)); 7837 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7838 format %{ "vextracti128_high $tmp,$src2\n\t" 7839 "vpminsb $tmp,$tmp,$src2\n\t" 7840 "pshufd $tmp4,$tmp,0xE\n\t" 7841 "vpminsb $tmp4,$tmp4,$tmp\n\t" 7842 "pshufd $tmp,$tmp4,0x1\n\t" 7843 "vpminsb $tmp,$tmp,$tmp4\n\t" 7844 "pextrb $tmp2,$tmp, 0x1\n\t" 7845 "movsbl $tmp2,$tmp2\n\t" 7846 "pextrb $tmp3,$tmp,0x0\n\t" 7847 "movsbl $tmp3,$tmp3\n\t" 7848 "cmpl $tmp2,$tmp3\n\t" 7849 "cmovl $tmp3,$tmp2\n\t" 7850 "cmpl $src1,$tmp3\n\t" 7851 "cmovl $tmp3,$src1, 0x0\n\t" 7852 "movl $dst,$tmp2\n\t" 7853 "pextrb $tmp2,$tmp\n\t" 7854 "movsbl $tmp2,$tmp2\n\t" 7855 "pextrb $tmp3,$tmp\n\t" 7856 "movsbl $tmp3,$tmp3\n\t" 7857 "cmpl $tmp2,$tmp3\n\t" 7858 "cmovl $tmp3,$tmp2\n\t" 7859 "cmpl $tmp3,$dst\n\t" 7860 "cmovl $dst,$tmp3\t! min reduction4S" %} 7861 ins_encode %{ 7862 int vector_len = 1; 7863 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 7864 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 7865 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 7866 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 7867 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7868 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 7869 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7870 __ movsbl($tmp2$$Register, $tmp2$$Register); 7871 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7872 __ movsbl($tmp3$$Register, $tmp3$$Register); 7873 __ cmpl($tmp2$$Register, $tmp3$$Register); 7874 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7875 __ cmpl($src1$$Register, $tmp3$$Register); 7876 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7877 __ movl($dst$$Register, $tmp3$$Register); 7878 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7879 __ movsbl($tmp2$$Register, $tmp2$$Register); 7880 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7881 __ movsbl($tmp3$$Register, $tmp3$$Register); 7882 __ cmpl($tmp2$$Register, $tmp3$$Register); 7883 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7884 __ cmpl($tmp3$$Register, $dst$$Register); 7885 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7886 __ movsbl($dst$$Register, $dst$$Register); 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 instruct rvmin64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 7892 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 7893 match(Set dst (MinReductionV src1 src2)); 7894 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7895 format %{ "vextracti64x4_high $tmp4,$src2\n\t" 7896 "vpminsb $tmp4,$tmp4,$src2\n\t" 7897 "vextracti128_high $tmp,$tmp4\n\t" 7898 "vpminsb $tmp,$tmp,$tmp4\n\t" 7899 "pshufd $tmp4,$tmp,0xE\n\t" 7900 "vpminsb $tmp4,$tmp4,$tmp\n\t" 7901 "pshufd $tmp,$tmp4,0x1\n\t" 7902 "vpminsb $tmp,$tmp,$tmp4\n\t" 7903 "pextrb $tmp2,$tmp, 0x1\n\t" 7904 "movsbl $tmp2,$tmp2\n\t" 7905 "pextrb $tmp3,$tmp,0x0\n\t" 7906 "movsbl $tmp3,$tmp3\n\t" 7907 "cmpl $tmp2,$tmp3\n\t" 7908 "cmovl $tmp3,$tmp2\n\t" 7909 "cmpl $src1,$tmp3\n\t" 7910 "cmovl $tmp3,$src1, 0x0\n\t" 7911 "movl $dst,$tmp2\n\t" 7912 "pextrb $tmp2,$tmp\n\t" 7913 "movsbl $tmp2,$tmp2\n\t" 7914 "pextrb $tmp3,$tmp\n\t" 7915 "movsbl $tmp3,$tmp3\n\t" 7916 "cmpl $tmp2,$tmp3\n\t" 7917 "cmovl $tmp3,$tmp2\n\t" 7918 "cmpl $tmp3,$dst\n\t" 7919 "cmovl $dst,$tmp3\t! min reduction4S" %} 7920 ins_encode %{ 7921 __ vextracti64x4_high($tmp4$$XMMRegister, $src2$$XMMRegister); 7922 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 2); 7923 __ vextracti128_high($tmp$$XMMRegister, $tmp4$$XMMRegister); 7924 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 1); 7925 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 7926 __ vpminsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 7927 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 7928 __ vpminsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 7929 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 7930 __ movsbl($tmp2$$Register, $tmp2$$Register); 7931 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 7932 __ movsbl($tmp3$$Register, $tmp3$$Register); 7933 __ cmpl($tmp2$$Register, $tmp3$$Register); 7934 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7935 __ cmpl($src1$$Register, $tmp3$$Register); 7936 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7937 __ movl($dst$$Register, $tmp3$$Register); 7938 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 7939 __ movsbl($tmp2$$Register, $tmp2$$Register); 7940 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 7941 __ movsbl($tmp3$$Register, $tmp3$$Register); 7942 __ cmpl($tmp2$$Register, $tmp3$$Register); 7943 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7944 __ cmpl($tmp3$$Register, $dst$$Register); 7945 __ cmovl(Assembler::less, $dst$$Register, $tmp3$$Register); 7946 __ movsbl($dst$$Register, $dst$$Register); 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 instruct rsmin4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 7952 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7953 match(Set dst (MinReductionV src1 src2)); 7954 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 7955 format %{ "pshufd $tmp,$src2,0x1\n\t" 7956 "pminsw $tmp,$src2\n\t" 7957 "pextrw $tmp2,$tmp, 0x1\n\t" 7958 "movswl $tmp2,$tmp2\n\t" 7959 "pextrb $tmp3,$tmp, 0x0\n\t" 7960 "movswl $tmp3,$tmp3,0x1\n\t" 7961 "cmpl $tmp2,$tmp3\n\t" 7962 "cmovl $tmp3,tmp2\n\t" 7963 "cmpl $src1,$tmp3\n\t" 7964 "cmovl $tmp3,$src1\n\t" 7965 "movswl $dst,$tmp3\t! min reduction4S" %} 7966 ins_encode %{ 7967 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 7968 __ pminsw($tmp$$XMMRegister, $src2$$XMMRegister); 7969 __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); 7970 __ movswl($tmp2$$Register, $tmp2$$Register); 7971 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 7972 __ movswl($tmp3$$Register, $tmp3$$Register); 7973 __ cmpl($tmp2$$Register, $tmp3$$Register); 7974 __ cmovl(Assembler::less, $tmp3$$Register, $tmp2$$Register); 7975 __ cmpl($src1$$Register, $tmp3$$Register); 7976 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 7977 __ movl($dst$$Register, $tmp3$$Register); 7978 %} 7979 ins_pipe( pipe_slow ); 7980 %} 7981 7982 instruct rsmin8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 7983 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 7984 match(Set dst (MinReductionV src1 src2)); 7985 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 7986 format %{ "pshufd $tmp2,$src2,0xE\n\t" 7987 "pminsw $tmp2,$src2\n\t" 7988 "pshufd $tmp,$tmp2,0x1\n\t" 7989 "pminsw $tmp,$tmp2\n\t" 7990 "pextrw $tmp2,$tmp\n\t" 7991 "movswl $tmp2,$tmp2\n\t" 7992 "pextrw $tmp3,$tmp, 0x0\n\t" 7993 "movswl $tmp3,$tmp3\n\t" 7994 "cmpl $tmp2,$tmp3\n\t" 7995 "cmovl $tmp3,$tmp2\n\t" 7996 "cmpl $src1,$tmp3\n\t" 7997 "cmovl $tmp3,$src1\n\t" 7998 "movl $dst,$tmp3\t! min reduction8S" %} 7999 ins_encode %{ 8000 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister,0xE); 8001 __ pminsw($tmp2$$XMMRegister, $src2$$XMMRegister); 8002 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8003 __ pminsw($tmp$$XMMRegister, $tmp2$$XMMRegister); 8004 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8005 __ movswl($tmp4$$Register, $tmp4$$Register); 8006 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8007 __ movswl($tmp3$$Register, $tmp3$$Register); 8008 __ cmpl($tmp4$$Register, $tmp3$$Register); 8009 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8010 __ cmpl($src1$$Register, $tmp3$$Register); 8011 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8012 __ movl($dst$$Register, $tmp3$$Register); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 instruct rvmin8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 8018 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 8019 match(Set dst (MinReductionV src1 src2)); 8020 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8021 format %{ "pshufd $tmp,$src2,0xE\n\t" 8022 "vpminsw $tmp,$tmp,$src2\n\t" 8023 "pshufd $tmp2,$tmp,0x1\n\t" 8024 "vpminsw $tmp,$tmp,$tmp2\n\t" 8025 "movzwl $dst,$src1\n\t" 8026 "pextrw $tmp3,$tmp, 0x0\n\t" 8027 "vpminsw $dst,$dst,$tmp3\n\t" 8028 "pextrw $tmp3,$tmp, 0x1\n\t" 8029 "vpminsw $dst,$dst,$tmp3\n\t" 8030 "movswl $dst,$dst\t! min reduction8S" %} 8031 ins_encode %{ 8032 int vector_len = 0; 8033 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8034 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8035 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 8036 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8037 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8038 __ movswl($tmp4$$Register, $tmp4$$Register); 8039 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8040 __ movswl($tmp3$$Register, $tmp3$$Register); 8041 __ cmpl($tmp4$$Register, $tmp3$$Register); 8042 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8043 __ cmpl($src1$$Register, $tmp3$$Register); 8044 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8045 __ movl($dst$$Register, $tmp3$$Register); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct rvmin16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 8051 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 8052 match(Set dst (MinReductionV src1 src2)); 8053 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8054 format %{ "vextracti128_high $tmp,$src2\n\t" 8055 "vpminsw $tmp,$tmp,$src2\n\t" 8056 "pshufd $tmp2,$tmp,0xE\n\t" 8057 "vpminsw $tmp,$tmp,$tmp2\n\t" 8058 "pshufd $tmp2,$tmp,0x1\n\t" 8059 "vpminsw $tmp,$tmp,$tmp2\n\t" 8060 "pextrw $tmp2,$tmp, 0x1\n\t" 8061 "movswl $tmp2,$tmp2\n\t" 8062 "pextrw $tmp3,$tmp, 0x0\n\t" 8063 "movswl $tmp3,$tmp3\n\t" 8064 "cmpl $tmp2$tmp3\n\t" 8065 "cmovl $tmp3,$tmp2\n\t" 8066 "cmpl $src1,$tmp3\n\t" 8067 "cmovl $tmp3,$src1\n\t" 8068 "movl $dst,$tmp3\t! min reduction16S" %} 8069 ins_encode %{ 8070 int vector_len = 1; 8071 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8072 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8073 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8074 __ vpminsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8075 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 8076 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8077 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8078 __ movswl($tmp4$$Register, $tmp4$$Register); 8079 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8080 __ movswl($tmp3$$Register, $tmp3$$Register); 8081 __ cmpl($tmp4$$Register, $tmp3$$Register); 8082 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8083 __ cmpl($src1$$Register, $tmp3$$Register); 8084 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8085 __ movl($dst$$Register, $tmp3$$Register); 8086 %} 8087 ins_pipe( pipe_slow ); 8088 %} 8089 8090 instruct rvmin32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 8091 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 8092 match(Set dst (MinReductionV src1 src2)); 8093 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8094 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 8095 "vpminsw $tmp2,$tmp2,$src2\n\t" 8096 "vextracti128_high $tmp,$tmp2\n\t" 8097 "vpminsw $tmp,$tmp,$tmp2\n\t" 8098 "pshufd $tmp2,$tmp,0xE\n\t" 8099 "vpminsw $tmp,$tmp,$tmp2\n\t" 8100 "pshufd $tmp2,$tmp,0x1\n\t" 8101 "vpminsw $tmp,$tmp,$tmp2\n\t" 8102 "pextrw $tmp3,$tmp, 0x0\n\t" 8103 "movswl $dst,$src1\n\t" 8104 "pextrw $tmp3,$tmp, 0x0\n\t" 8105 "movswl $dst,$src1\n\t" 8106 "cmpl $tmp2$tmp3\n\t" 8107 "cmovl $tmp3,$tmp2\n\t" 8108 "cmpl $src1,$tmp3\n\t" 8109 "cmovl $tmp3,$src1\n\t" 8110 "movl $dst,$dst\t! min reduction32S" %} 8111 ins_encode %{ 8112 int vector_len = 2; 8113 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 8114 __ vpminsw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8115 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 8116 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8117 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8118 __ vpminsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8119 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 8120 __ vpminsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8121 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 8122 __ movswl($tmp4$$Register, $tmp4$$Register); 8123 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 8124 __ movswl($tmp3$$Register, $tmp3$$Register); 8125 __ cmpl($tmp4$$Register, $tmp3$$Register); 8126 __ cmovl(Assembler::less, $tmp3$$Register, $tmp4$$Register); 8127 __ cmpl($src1$$Register, $tmp3$$Register); 8128 __ cmovl(Assembler::less, $tmp3$$Register, $src1$$Register); 8129 __ movl($dst$$Register, $tmp3$$Register); 8130 %} 8131 ins_pipe( pipe_slow ); 8132 %} 8133 8134 instruct rsmin2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 8135 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8136 match(Set dst (MinReductionV src1 src2)); 8137 effect(TEMP tmp, TEMP tmp2); 8138 format %{ "pshufd $tmp,$src2,0x1\n\t" 8139 "pminsd $tmp,$src2\n\t" 8140 "movd $tmp2,$src1\n\t" 8141 "pminsd $tmp2,$tmp\n\t" 8142 "movd $dst,$tmp2\t! min reduction2I" %} 8143 ins_encode %{ 8144 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8145 __ pminsd($tmp$$XMMRegister, $src2$$XMMRegister); 8146 __ movdl($tmp2$$XMMRegister, $src1$$Register); 8147 __ pminsd($tmp2$$XMMRegister, $tmp$$XMMRegister); 8148 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8149 %} 8150 ins_pipe( pipe_slow ); 8151 %} 8152 8153 instruct rvmin2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 8154 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8155 match(Set dst (MinReductionV src1 src2)); 8156 effect(TEMP tmp, TEMP tmp2); 8157 format %{ "pshufd $tmp,$src2,0x1\n\t" 8158 "vpminsd $tmp2,$tmp,$src2\n\t" 8159 "movd $tmp,$src1\n\t" 8160 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8161 "movd $dst,$tmp2\t! min reduction2I" %} 8162 ins_encode %{ 8163 int vector_len = 0; 8164 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8165 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8166 __ movdl($tmp2$$XMMRegister, $src1$$Register); 8167 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8168 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8169 %} 8170 ins_pipe( pipe_slow ); 8171 %} 8172 8173 instruct rsmin4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 8174 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8175 match(Set dst (MinReductionV src1 src2)); 8176 effect(TEMP tmp, TEMP tmp2); 8177 format %{ "pshufd $tmp,$src2,0xE\n\t" 8178 "pminsd $tmp,$src2\n\t" 8179 "pshufd $tmp2,$tmp,0x1\n\t" 8180 "pminsd $tmp2,$tmp\n\t" 8181 "movd $tmp,$src1\n\t" 8182 "pminsd $tmp2,$tmp\n\t" 8183 "movd $dst,$tmp2\t! min reduction4I" %} 8184 ins_encode %{ 8185 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8186 __ pminsd($tmp$$XMMRegister, $src2$$XMMRegister); 8187 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 8188 __ pminsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 8189 __ movdl($tmp$$XMMRegister, $src1$$Register); 8190 __ pminsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 8191 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8192 %} 8193 ins_pipe( pipe_slow ); 8194 %} 8195 8196 instruct rvmin4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 8197 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8198 match(Set dst (MinReductionV src1 src2)); 8199 effect(TEMP tmp, TEMP tmp2); 8200 format %{ "pshufd $tmp,$src2,0xE\n\t" 8201 "vpminsd $tmp2,$tmp,$src2\n\t" 8202 "pshufd $tmp,$tmp2,0x1\n\t" 8203 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8204 "movd $tmp,$src1\n\t" 8205 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8206 "movd $dst,$tmp2\t! min reduction4I" %} 8207 ins_encode %{ 8208 int vector_len = 0; 8209 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8210 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8211 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8212 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8213 __ movdl($tmp$$XMMRegister, $src1$$Register); 8214 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8215 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8216 %} 8217 ins_pipe( pipe_slow ); 8218 %} 8219 8220 instruct rvmin4I_reduction_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 8221 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8222 match(Set dst (MinReductionV src1 src2)); 8223 effect(TEMP tmp, TEMP tmp2); 8224 format %{ "pshufd $tmp,$src2,0xE\n\t" 8225 "vpminsd $tmp2,$tmp,$src2\n\t" 8226 "pshufd $tmp,$tmp2,0x1\n\t" 8227 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8228 "movd $tmp,$src1\n\t" 8229 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8230 "movd $dst,$tmp2\t! min reduction4I" %} 8231 ins_encode %{ 8232 int vector_len = 0; 8233 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8234 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8235 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8236 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8237 __ movdl($tmp$$XMMRegister, $src1$$Register); 8238 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8239 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 instruct rvmin8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 8245 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8246 match(Set dst (MinReductionV src1 src2)); 8247 effect(TEMP tmp, TEMP tmp2); 8248 format %{ "vextracti128_high $tmp,$src2\n\t" 8249 "vpminsd $tmp,$tmp,$src2\n\t" 8250 "pshufd $tmp2,$tmp,0xE\n\t" 8251 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8252 "pshufd $tmp,$tmp2,0x1\n\t" 8253 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8254 "movd $tmp,$src1\n\t" 8255 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8256 "movd $dst,$tmp2\t! min reduction8I" %} 8257 ins_encode %{ 8258 int vector_len = 1; 8259 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8260 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8261 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8262 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8263 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8264 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8265 __ movdl($tmp$$XMMRegister, $src1$$Register); 8266 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8267 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8268 %} 8269 ins_pipe( pipe_slow ); 8270 %} 8271 8272 instruct rvmin8I_reduction_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 8273 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8274 match(Set dst (MinReductionV src1 src2)); 8275 effect(TEMP tmp, TEMP tmp2); 8276 format %{ "vextracti128_high $tmp,$src2\n\t" 8277 "vpminsd $tmp,$tmp,$src2\n\t" 8278 "pshufd $tmp2,$tmp,0xE\n\t" 8279 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8280 "pshufd $tmp,$tmp2,0x1\n\t" 8281 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8282 "movd $tmp,$src1\n\t" 8283 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8284 "movd $dst,$tmp2\t! min reduction8I" %} 8285 ins_encode %{ 8286 int vector_len = 1; 8287 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8288 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 8289 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8290 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8291 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8292 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8293 __ movdl($tmp$$XMMRegister, $src1$$Register); 8294 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8295 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8296 %} 8297 ins_pipe( pipe_slow ); 8298 %} 8299 8300 instruct rvmin16I_reduction_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 8301 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 8302 match(Set dst (MinReductionV src1 src2)); 8303 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8304 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 8305 "vpminsd $tmp3,$tmp3,$src2\n\t" 8306 "vextracti128_high $tmp,$tmp3\n\t" 8307 "vpminsd $tmp,$tmp,$tmp3\n\t" 8308 "pshufd $tmp2,$tmp,0xE\n\t" 8309 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8310 "pshufd $tmp,$tmp2,0x1\n\t" 8311 "vpminsd $tmp2,$tmp2,$tmp\n\t" 8312 "movd $tmp,$src1\n\t" 8313 "vpminsd $tmp2,$tmp,$tmp2\n\t" 8314 "movd $dst,$tmp2\t! min reduction16I" %} 8315 ins_encode %{ 8316 int vector_len = 2; 8317 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 8318 __ vpminsd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 8319 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 8320 __ vpminsd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 8321 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 8322 __ vpminsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8323 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 8324 __ vpminsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8325 __ movdl($tmp$$XMMRegister, $src1$$Register); 8326 __ vpminsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8327 __ movdl($dst$$Register, $tmp2$$XMMRegister); 8328 %} 8329 ins_pipe( pipe_slow ); 8330 %} 8331 8332 // Long Min Reduction 8333 instruct rsmin1L_reduction_reg(rRegL dst, rRegL src1, vecD src2, rxmm0 tmp, regF tmp2) %{ 8334 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8335 match(Set dst (MinReductionV src1 src2)); 8336 effect(TEMP tmp, TEMP tmp2); 8337 format %{ "movdq $tmp,$src1\n\t" 8338 "movdq $tmp2,$src1\n\t" 8339 "pcmpgtq $tmp,$src2\n\t" 8340 "blendvpd $tmp2,$src2\n\t" 8341 "movdq $dst,$tmp2\t! min reduction1L" %} 8342 ins_encode %{ 8343 __ movdq($tmp$$XMMRegister,$src1$$Register); 8344 __ movdq($tmp2$$XMMRegister,$src1$$Register); 8345 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 8346 __ blendvpd($tmp2$$XMMRegister,$src2$$XMMRegister); 8347 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8348 %} 8349 ins_pipe( pipe_slow ); 8350 %} 8351 8352 instruct rsmin2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, rxmm0 xmm_0, regF tmp2, regF tmp3) %{ 8353 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8354 match(Set dst (MinReductionV src1 src2)); 8355 effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); 8356 format %{ "pshufd $tmp3,$src2,0xE\n\t" 8357 "movdqu $xmm_0,$src2\n\t" 8358 "movdqu $tmp2,$src2\n\t" 8359 "pcmpgtq $xmm_0,$tmp3\n\t" 8360 "blendvpd $tmp2,$tmp3\n\t" 8361 "movdqu $xmm_0,$tmp2\n\t" 8362 "movdq $tmp3,$src1\n\t" 8363 "pcmpgtq $xmm_0,$tmp3\n\t" 8364 "blendvpd $tmp2,$tmp3\n\t" 8365 "movq $dst,$tmp2\t! min reduction2L" %} 8366 ins_encode %{ 8367 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 0xE); 8368 __ movdqu($xmm_0$$XMMRegister, $src2$$XMMRegister); 8369 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 8370 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 8371 __ blendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister); 8372 __ movdqu($xmm_0$$XMMRegister, $tmp2$$XMMRegister); 8373 __ movdq($tmp3$$XMMRegister, $src1$$Register); 8374 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 8375 __ blendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister); 8376 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8377 %} 8378 ins_pipe( pipe_slow ); 8379 %} 8380 8381 instruct rvmin2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2, regF tmp3) %{ 8382 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8383 match(Set dst (MinReductionV src1 src2)); 8384 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8385 format %{ "pshufd $tmp2,$src2,0xE\n\t" 8386 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 8387 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 8388 "movq $tmp,$src1\n\t" 8389 "vpcmpgtq $tmp3,$tmp2,$tmp\n\t" 8390 "blendvpd $tmp2,$tmp2,$src1,$tmp3\n\t" 8391 "movq $dst,$tmp2\t! min reduction2L" %} 8392 ins_encode %{ 8393 int vector_len = 0; 8394 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 8395 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8396 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8397 __ movdq($tmp$$XMMRegister,$src1$$Register); 8398 __ vpcmpgtq($tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8399 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister,$src1$$XMMRegister,$tmp3$$XMMRegister, vector_len); 8400 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 instruct rvmin4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 8406 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8407 match(Set dst (MinReductionV src1 src2)); 8408 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8409 format %{ "vextracti128_high $tmp2,$src2\n\t" 8410 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 8411 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 8412 "vpshufd $tmp3, $tmp2,0x1\n\t" 8413 "vpcmpgtq $tmp, $tmp3,$tmp\n\t2" 8414 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 8415 "movq $tmp2,$src1\n\t" 8416 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 8417 "blendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 8418 "movq $dst,$tmp2\t! min reduction2L" %} 8419 ins_encode %{ 8420 int vector_len = 1; 8421 __ vextracti128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 8422 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 8423 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8424 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8425 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8426 __ vblendvpd($tmp3$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 8427 __ movdq($tmp$$XMMRegister,$src1$$Register); 8428 __ vpcmpgtq($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 8429 __ vblendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister,$tmp$$XMMRegister,$tmp2$$XMMRegister, vector_len); 8430 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8431 %} 8432 ins_pipe( pipe_slow ); 8433 %} 8434 8435 instruct rvmin8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 8436 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 8437 match(Set dst (MinReductionV src1 src2)); 8438 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 8439 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 8440 "vpcmpgtq $tmp,$tmp3,$src2\n\t" 8441 "vblendvpd $tmp3,$tmp3,$src2,$tmp\n\t" 8442 "vextracti128_high $tmp2,$tmp3\n\t" 8443 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 8444 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 8445 "vpshufd $tmp3,$tmp2,0x1\n\t" 8446 "vpcmpgtq $tmp,$tmp3,$tmp2\n\t" 8447 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 8448 "movq $tmp2,$src1\n\t" 8449 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 8450 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 8451 "movq $dst,$tmp2\t! min reduction4I" %} 8452 ins_encode %{ 8453 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 8454 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 8455 __ vblendvpd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 1); 8456 __ vextracti128_high($tmp2$$XMMRegister, $tmp3$$XMMRegister); 8457 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 1); 8458 __ vblendvpd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, 1); 8459 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8460 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, 1); 8461 __ vblendvpd($tmp3$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, 1); 8462 __ movdq($tmp2$$XMMRegister, $src1$$Register); 8463 __ vpcmpgtq($tmp$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, 1); 8464 __ vblendvpd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, 1); 8465 __ movdq($dst$$Register, $tmp2$$XMMRegister); 8466 %} 8467 ins_pipe( pipe_slow ); 8468 %} 8469 8470 // Float Min Reduction 8471 instruct rsmin2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 8472 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8473 match(Set dst (MinReductionV dst src2)); 8474 effect(TEMP dst, TEMP tmp); 8475 format %{ "minps $dst, $src2\n\t" 8476 "pshufd $tmp,$src2,0x1\n\t" 8477 "minps $dst,$tmp\t! min reduction2F" %} 8478 ins_encode %{ 8479 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 8480 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8481 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8482 %} 8483 ins_pipe( pipe_slow ); 8484 %} 8485 8486 instruct rvmin2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 8487 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8488 match(Set dst (MinReductionV dst src2)); 8489 effect(TEMP dst, TEMP tmp); 8490 format %{ "vminps $dst,$dst,$src2\n\t" 8491 "pshufd $tmp,$src2,0x1\n\t" 8492 "vminps $dst,$dst,$tmp\t! min reduction2F" %} 8493 ins_encode %{ 8494 int vector_len = 0; 8495 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8496 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8497 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct rsmin4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 8503 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8504 match(Set dst (MinReductionV dst src2)); 8505 effect(TEMP tmp, TEMP dst); 8506 format %{ "minps $dst,$src2\n\t" 8507 "pshufd $tmp,$src2,0x1\n\t" 8508 "minps $dst,tmp\n\t" 8509 "pshufd $tmp,$src2,0x2\n\t" 8510 "minps $dst,tmp\n\t" 8511 "pshufd $tmp,$src2,0x3\n\t" 8512 "minps $dst,$tmp\t! min reduction4F" %} 8513 ins_encode %{ 8514 int vector_len = 0; 8515 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 8516 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8517 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8518 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 8519 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8520 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 8521 __ minps($dst$$XMMRegister, $tmp$$XMMRegister); 8522 %} 8523 ins_pipe( pipe_slow ); 8524 %} 8525 8526 instruct rvmin4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 8527 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8528 match(Set dst (MinReductionV dst src2)); 8529 effect(TEMP tmp, TEMP dst); 8530 format %{ "vminps $dst,$dst,$src2\n\t" 8531 "pshufd $tmp,$src2,0x1\n\t" 8532 "vminps $dst,$dst,tmp\n\t" 8533 "pshufd $tmp,$src2,0x2\n\t" 8534 "vminps $dst,$dst,tmp\n\t" 8535 "pshufd $tmp,$src2,0x3\n\t" 8536 "vminps $dst,$dst,$tmp\t! min reduction4F" %} 8537 ins_encode %{ 8538 int vector_len = 0; 8539 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8540 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 8541 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8542 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 8543 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8544 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 8545 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 instruct rvmin8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 8551 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8552 match(Set dst (MinReductionV dst src2)); 8553 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8554 format %{ "vminps $dst,$dst,$src2\n\t" 8555 "pshufd $tmp,$src2,0x01\n\t" 8556 "vminps $dst,$dst,$tmp\n\t" 8557 "pshufd $tmp,$src2,0x02\n\t" 8558 "vminps $dst,$dst,$tmp\n\t" 8559 "pshufd $tmp,$src2,0x03\n\t" 8560 "vminps $dst,$dst,$tmp\n\t" 8561 "vextractf128_high $tmp2,$src2\n\t" 8562 "vminps $dst,$dst,$tmp2\n\t" 8563 "pshufd $tmp,$tmp2,0x01\n\t" 8564 "vminps $dst,$dst,$tmp\n\t" 8565 "pshufd $tmp,$tmp2,0x02\n\t" 8566 "vminps $dst,$dst,$tmp\n\t" 8567 "pshufd $tmp,$tmp2,0x03\n\t" 8568 "vminps $dst,$dst,$tmp\t! sub reduction8F" %} 8569 ins_encode %{ 8570 int vector_len = 1; 8571 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8572 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 8573 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8574 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 8575 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8576 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 8577 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8578 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 8579 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8580 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8581 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8582 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8583 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8584 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8585 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct rvmin16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 8591 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 8592 match(Set dst (MinReductionV dst src2)); 8593 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8594 format %{ "vminps $dst,$dst,$src2\n\t" 8595 "pshufd $tmp,$src2,0x01\n\t" 8596 "vminps $dst,$dst,$tmp\n\t" 8597 "pshufd $tmp,$src2,0x02\n\t" 8598 "vminps $dst,$dst,$tmp\n\t" 8599 "pshufd $tmp,$src2,0x03\n\t" 8600 "vminps $dst,$dst,$tmp\n\t" 8601 "vextractf32x4 $tmp2,$src2,0x1\n\t" 8602 "vminps $dst,$dst,$tmp2\n\t" 8603 "pshufd $tmp,$tmp2,0x01\n\t" 8604 "vminps $dst,$dst,$tmp\n\t" 8605 "pshufd $tmp,$tmp2,0x02\n\t" 8606 "vminps $dst,$dst,$tmp\n\t" 8607 "pshufd $tmp,$tmp2,0x03\n\t" 8608 "vminps $dst,$dst,$tmp\n\t" 8609 "vextractf32x4 $tmp2,$src2,0x2\n\t" 8610 "vminps $dst,$dst,$tmp2\n\t" 8611 "pshufd $tmp,$tmp2,0x01\n\t" 8612 "vminps $dst,$dst,$tmp\n\t" 8613 "pshufd $tmp,$tmp2,0x02\n\t" 8614 "vminps $dst,$dst,$tmp\n\t" 8615 "pshufd $tmp,$tmp2,0x03\n\t" 8616 "vminps $dst,$dst,$tmp\n\t" 8617 "vextractf32x4 $tmp2,$src2,0x3\n\t" 8618 "vminps $dst,$dst,$tmp2\n\t" 8619 "pshufd $tmp,$tmp2,0x01\n\t" 8620 "vminps $dst,$dst,$tmp\n\t" 8621 "pshufd $tmp,$tmp2,0x02\n\t" 8622 "vminps $dst,$dst,$tmp\n\t" 8623 "pshufd $tmp,$tmp2,0x03\n\t" 8624 "vminps $dst,$dst,$tmp\t! sub reduction16F" %} 8625 ins_encode %{ 8626 int vector_len = 2; 8627 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8628 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 8629 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8630 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 8631 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8632 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 8633 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8634 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 8635 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8636 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8637 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8638 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8639 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8640 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8641 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8642 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 8643 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8644 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8645 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8646 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8647 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8648 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8649 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8650 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 8651 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8652 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 8653 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8654 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 8655 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8656 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 8657 __ vminps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8658 %} 8659 ins_pipe( pipe_slow ); 8660 %} 8661 8662 instruct rsmin2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 8663 predicate(UseSSE >= 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8664 match(Set dst (MinReductionV dst src2)); 8665 effect(TEMP tmp, TEMP dst); 8666 format %{ "minpd $dst,$src2\n\t" 8667 "pshufd $tmp,$src2,0xE\n\t" 8668 "minpd $dst,$tmp\t! min reduction2D" %} 8669 ins_encode %{ 8670 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 8671 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8672 __ minpd($dst$$XMMRegister, $tmp$$XMMRegister); 8673 %} 8674 ins_pipe( pipe_slow ); 8675 %} 8676 8677 instruct rvmin2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 8678 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8679 match(Set dst (MinReductionV dst src2)); 8680 effect(TEMP tmp, TEMP dst); 8681 format %{ "vminpd $dst,$dst,$src2\n\t" 8682 "pshufd $tmp,$src2,0xE\n\t" 8683 "vminpd $dst,$dst,$tmp\t! min reduction2D" %} 8684 ins_encode %{ 8685 int vector_len = 0; 8686 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8687 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8688 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 instruct rvmin4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 8694 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8695 match(Set dst (MinReductionV dst src2)); 8696 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8697 format %{ "vminpd $dst,$dst,$src2\n\t" 8698 "pshufd $tmp,$src2,0xE\n\t" 8699 "vminpd $dst,$dst,$tmp\n\t" 8700 "vextractf32x4 $tmp2,$src2,0x1\n\t" 8701 "vminpd $dst,$dst,$tmp2\n\t" 8702 "pshufd $tmp,$tmp2,0xE\n\t" 8703 "vminpd $dst,$dst,$tmp\t! min reduction4D" %} 8704 ins_encode %{ 8705 int vector_len = 1; 8706 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8707 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8708 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8709 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 8710 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8711 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8712 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct rvmin8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 8718 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 8719 match(Set dst (MinReductionV dst src2)); 8720 effect(TEMP tmp, TEMP dst, TEMP tmp2); 8721 format %{ "vminpd $dst,$dst,$src2\n\t" 8722 "pshufd $tmp,$src2,0xE\n\t" 8723 "vminpd $dst,$dst,$tmp\n\t" 8724 "vextractf32x4 $tmp2,$src2,0x1\n\t" 8725 "vminpd $dst,$dst,$tmp2\n\t" 8726 "pshufd $tmp,$tmp2,0xE\n\t" 8727 "vminpd $dst,$dst,$tmp\n\t" 8728 "vextractf32x4 $tmp2,$src2,0x2\n\t" 8729 "vminpd $dst,$dst,$tmp2\n\t" 8730 "pshufd $tmp,$tmp2,0xE\n\t" 8731 "vminpd $dst,$dst,$tmp\n\t" 8732 "vextractf32x4 $tmp2,$src2,0x3\n\t" 8733 "vminpd $dst,$dst,$tmp2\n\t" 8734 "pshufd $tmp,$tmp2,0xE\n\t" 8735 "vminpd $dst,$dst,$tmp\t! min reduction8D" %} 8736 ins_encode %{ 8737 int vector_len = 2; 8738 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 8739 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 8740 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8741 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 8742 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8743 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8744 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8745 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 8746 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8747 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8748 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8749 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 8750 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 8751 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 8752 __ vminpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 8753 %} 8754 ins_pipe( pipe_slow ); 8755 %} 8756 8757 // ------- Max Reduction ------------ 8758 8759 instruct rsmax8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8760 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8761 match(Set dst (MaxReductionV src1 src2)); 8762 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8763 format %{ "pshufd $tmp,$src2,0x1\n\t" 8764 "pminsb $tmp,$src2\n\t" 8765 "pextrb $tmp2,$tmp, 0x1\n\t" 8766 "movsbl $tmp2,$tmp2\n\t" 8767 "pextrb $tmp3,$tmp,0x0\n\t" 8768 "movsbl $tmp3,$tmp3\n\t" 8769 "cmpl $tmp2,$tmp3\n\t" 8770 "cmovl $tmp3,$tmp2\n\t" 8771 "cmpl $src1,$tmp3\n\t" 8772 "cmovl $tmp3,$src1, 0x0\n\t" 8773 "movl $dst,$tmp2\n\t" 8774 "pextrb $tmp2,$tmp\n\t" 8775 "movsbl $tmp2,$tmp2\n\t" 8776 "pextrb $tmp3,$tmp\n\t" 8777 "movsbl $tmp3,$tmp3\n\t" 8778 "cmpl $tmp2,$tmp3\n\t" 8779 "cmovl $tmp3,$tmp2\n\t" 8780 "cmpl $tmp3,$dst\n\t" 8781 "cmovl $dst,$tmp3\t! min reduction4S" %} 8782 ins_encode %{ 8783 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 8784 __ pmaxsb($tmp$$XMMRegister, $src2$$XMMRegister); 8785 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8786 __ movsbl($tmp2$$Register, $tmp2$$Register); 8787 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8788 __ movsbl($tmp3$$Register, $tmp3$$Register); 8789 __ cmpl($tmp2$$Register, $tmp3$$Register); 8790 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8791 __ cmpl($src1$$Register, $tmp3$$Register); 8792 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8793 __ movl($dst$$Register, $tmp3$$Register); 8794 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8795 __ movsbl($tmp2$$Register, $tmp2$$Register); 8796 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8797 __ movsbl($tmp3$$Register, $tmp3$$Register); 8798 __ cmpl($tmp2$$Register, $tmp3$$Register); 8799 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8800 __ cmpl($tmp3$$Register, $dst$$Register); 8801 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8802 __ movsbl($dst$$Register, $dst$$Register); 8803 %} 8804 ins_pipe( pipe_slow ); 8805 %} 8806 8807 instruct rsmax16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8808 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8809 match(Set dst (MaxReductionV src1 src2)); 8810 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8811 format %{ "pshufd $tmp4,$src2,0xE\n\t" 8812 "pmaxsb $tmp4,$src2\n\t" 8813 "pshufd $tmp,$tmp4,0x1\n\t" 8814 "pmaxsb $tmp,$tmp4\n\t" 8815 "pextrb $tmp2,$tmp, 0x1\n\t" 8816 "movsbl $tmp2,$tmp2\n\t" 8817 "pextrb $tmp3,$tmp,0x0\n\t" 8818 "movsbl $tmp3,$tmp3\n\t" 8819 "cmpl $tmp2,$tmp3\n\t" 8820 "cmovl $tmp3,$tmp2\n\t" 8821 "cmpl $src1,$tmp3\n\t" 8822 "cmovl $tmp3,$src1, 0x0\n\t" 8823 "movl $dst,$tmp2\n\t" 8824 "pextrb $tmp2,$tmp\n\t" 8825 "movsbl $tmp2,$tmp2\n\t" 8826 "pextrb $tmp3,$tmp\n\t" 8827 "movsbl $tmp3,$tmp3\n\t" 8828 "cmpl $tmp2,$tmp3\n\t" 8829 "cmovl $tmp3,$tmp2\n\t" 8830 "cmpl $tmp3,$dst\n\t" 8831 "cmovl $dst,$tmp3\t! max reduction4S" %} 8832 ins_encode %{ 8833 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 8834 __ pmaxsb($tmp4$$XMMRegister, $src2$$XMMRegister); 8835 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 8836 __ pmaxsb($tmp$$XMMRegister, $tmp4$$XMMRegister); 8837 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8838 __ movsbl($tmp2$$Register, $tmp2$$Register); 8839 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8840 __ movsbl($tmp3$$Register, $tmp3$$Register); 8841 __ cmpl($tmp2$$Register, $tmp3$$Register); 8842 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8843 __ cmpl($src1$$Register, $tmp3$$Register); 8844 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8845 __ movl($dst$$Register, $tmp3$$Register); 8846 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8847 __ movsbl($tmp2$$Register, $tmp2$$Register); 8848 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8849 __ movsbl($tmp3$$Register, $tmp3$$Register); 8850 __ cmpl($tmp2$$Register, $tmp3$$Register); 8851 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8852 __ cmpl($tmp3$$Register, $dst$$Register); 8853 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8854 __ movsbl($dst$$Register, $dst$$Register); 8855 %} 8856 ins_pipe( pipe_slow ); 8857 %} 8858 8859 instruct rvmax16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8860 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8861 match(Set dst (MaxReductionV src1 src2)); 8862 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8863 format %{ "pshufd $tmp4,$src2,0xE\n\t" 8864 "vpmaxsb $tmp,$tmp4,$src2\n\t" 8865 "pshufd $tmp,$tmp4,0x1\n\t" 8866 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8867 "pextrb $tmp2,$tmp, 0x1\n\t" 8868 "movsbl $tmp2,$tmp2\n\t" 8869 "pextrb $tmp3,$tmp,0x0\n\t" 8870 "movsbl $tmp3,$tmp3\n\t" 8871 "cmpl $tmp2,$tmp3\n\t" 8872 "cmovl $tmp3,$tmp2\n\t" 8873 "cmpl $src1,$tmp3\n\t" 8874 "cmovl $tmp3,$src1, 0x0\n\t" 8875 "movl $dst,$tmp2\n\t" 8876 "pextrb $tmp2,$tmp\n\t" 8877 "movsbl $tmp2,$tmp2\n\t" 8878 "pextrb $tmp3,$tmp\n\t" 8879 "movsbl $tmp3,$tmp3\n\t" 8880 "cmpl $tmp2,$tmp3\n\t" 8881 "cmovl $tmp3,$tmp2\n\t" 8882 "cmpl $tmp3,$dst\n\t" 8883 "cmovl $dst,$tmp3\t! max reduction4S" %} 8884 ins_encode %{ 8885 int vector_len = 0; 8886 __ pshufd($tmp4$$XMMRegister, $src2$$XMMRegister, 0xE); 8887 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 0); 8888 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 8889 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 8890 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8891 __ movsbl($tmp2$$Register, $tmp2$$Register); 8892 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8893 __ movsbl($tmp3$$Register, $tmp3$$Register); 8894 __ cmpl($tmp2$$Register, $tmp3$$Register); 8895 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8896 __ cmpl($src1$$Register, $tmp3$$Register); 8897 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8898 __ movl($dst$$Register, $tmp3$$Register); 8899 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8900 __ movsbl($tmp2$$Register, $tmp2$$Register); 8901 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8902 __ movsbl($tmp3$$Register, $tmp3$$Register); 8903 __ cmpl($tmp2$$Register, $tmp3$$Register); 8904 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8905 __ cmpl($tmp3$$Register, $dst$$Register); 8906 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8907 __ movsbl($dst$$Register, $dst$$Register); 8908 %} 8909 ins_pipe( pipe_slow ); 8910 %} 8911 8912 instruct rvmax32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8913 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8914 match(Set dst (MaxReductionV src1 src2)); 8915 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8916 format %{ "vextracti128_high $tmp,$src2\n\t" 8917 "vpmaxsb $tmp,$tmp,$src2\n\t" 8918 "pshufd $tmp4,$tmp,0xE\n\t" 8919 "vpmaxsb $tmp4,$tmp4,$tmp\n\t" 8920 "pshufd $tmp,$tmp4,0x1\n\t" 8921 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8922 "pextrb $tmp2,$tmp, 0x1\n\t" 8923 "movsbl $tmp2,$tmp2\n\t" 8924 "pextrb $tmp3,$tmp,0x0\n\t" 8925 "movsbl $tmp3,$tmp3\n\t" 8926 "cmpl $tmp2,$tmp3\n\t" 8927 "cmovl $tmp3,$tmp2\n\t" 8928 "cmpl $src1,$tmp3\n\t" 8929 "cmovl $tmp3,$src1, 0x0\n\t" 8930 "movl $dst,$tmp2\n\t" 8931 "pextrb $tmp2,$tmp\n\t" 8932 "movsbl $tmp2,$tmp2\n\t" 8933 "pextrb $tmp3,$tmp\n\t" 8934 "movsbl $tmp3,$tmp3\n\t" 8935 "cmpl $tmp2,$tmp3\n\t" 8936 "cmovl $tmp3,$tmp2\n\t" 8937 "cmpl $tmp3,$dst\n\t" 8938 "cmovl $dst,$tmp3\t! min reduction4S" %} 8939 ins_encode %{ 8940 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 8941 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 8942 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 8943 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 8944 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 8945 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 8946 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 8947 __ movsbl($tmp2$$Register, $tmp2$$Register); 8948 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 8949 __ movsbl($tmp3$$Register, $tmp3$$Register); 8950 __ cmpl($tmp2$$Register, $tmp3$$Register); 8951 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8952 __ cmpl($src1$$Register, $tmp3$$Register); 8953 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 8954 __ movl($dst$$Register, $tmp3$$Register); 8955 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 8956 __ movsbl($tmp2$$Register, $tmp2$$Register); 8957 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 8958 __ movsbl($tmp3$$Register, $tmp3$$Register); 8959 __ cmpl($tmp2$$Register, $tmp3$$Register); 8960 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 8961 __ cmpl($tmp3$$Register, $dst$$Register); 8962 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 8963 __ movsbl($dst$$Register, $dst$$Register); 8964 %} 8965 ins_pipe( pipe_slow ); 8966 %} 8967 8968 instruct rvmax64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, rRegI tmp2, rRegI tmp3, regF tmp4) %{ 8969 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 8970 match(Set dst (MaxReductionV src1 src2)); 8971 effect(TEMP dst, TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 8972 format %{ "vextracti64x4_high $tmp4,$src2\n\t" 8973 "vpmaxsb $tmp4,$tmp4,$src2\n\t" 8974 "vextracti128_high $tmp,$tmp4\n\t" 8975 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8976 "pshufd $tmp4,$tmp,0xE\n\t" 8977 "vpmaxsb $tmp,$tmp4,$tmp\n\t" 8978 "pshufd $tmp4,$src2,0xE\n\t" 8979 "vpmaxsb $tmp,$tmp4,$src2\n\t" 8980 "pshufd $tmp,$tmp4,0x1\n\t" 8981 "vpmaxsb $tmp,$tmp,$tmp4\n\t" 8982 "pextrb $tmp2,$tmp, 0x1\n\t" 8983 "movsbl $tmp2,$tmp2\n\t" 8984 "pextrb $tmp3,$tmp,0x0\n\t" 8985 "movsbl $tmp3,$tmp3\n\t" 8986 "cmpl $tmp2,$tmp3\n\t" 8987 "cmovl $tmp3,$tmp2\n\t" 8988 "cmpl $src1,$tmp3\n\t" 8989 "cmovl $tmp3,$src1, 0x0\n\t" 8990 "movl $dst,$tmp2\n\t" 8991 "pextrb $tmp2,$tmp\n\t" 8992 "movsbl $tmp2,$tmp2\n\t" 8993 "pextrb $tmp3,$tmp\n\t" 8994 "movsbl $tmp3,$tmp3\n\t" 8995 "cmpl $tmp2,$tmp3\n\t" 8996 "cmovl $tmp3,$tmp2\n\t" 8997 "cmpl $tmp3,$dst\n\t" 8998 "cmovl $dst,$tmp3\t! max reduction32B" %} 8999 ins_encode %{ 9000 __ vextracti64x4_high($tmp4$$XMMRegister, $src2$$XMMRegister); 9001 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $src2$$XMMRegister, 1); 9002 __ vextracti128_high($tmp$$XMMRegister, $tmp4$$XMMRegister); 9003 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 9004 __ pshufd($tmp4$$XMMRegister, $tmp$$XMMRegister, 0xE); 9005 __ vpmaxsb($tmp4$$XMMRegister, $tmp4$$XMMRegister, $tmp$$XMMRegister, 0); 9006 __ pshufd($tmp$$XMMRegister, $tmp4$$XMMRegister,0x1); 9007 __ vpmaxsb($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp4$$XMMRegister, 0); 9008 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x1); 9009 __ movsbl($tmp2$$Register, $tmp2$$Register); 9010 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x0); 9011 __ movsbl($tmp3$$Register, $tmp3$$Register); 9012 __ cmpl($tmp2$$Register, $tmp3$$Register); 9013 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9014 __ cmpl($src1$$Register, $tmp3$$Register); 9015 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9016 __ movl($dst$$Register, $tmp3$$Register); 9017 __ pextrb($tmp2$$Register, $tmp$$XMMRegister,0x3); 9018 __ movsbl($tmp2$$Register, $tmp2$$Register); 9019 __ pextrb($tmp3$$Register, $tmp$$XMMRegister,0x2); 9020 __ movsbl($tmp3$$Register, $tmp3$$Register); 9021 __ cmpl($tmp2$$Register, $tmp3$$Register); 9022 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9023 __ cmpl($tmp3$$Register, $dst$$Register); 9024 __ cmovl(Assembler::greater, $dst$$Register, $tmp3$$Register); 9025 __ movsbl($dst$$Register, $dst$$Register); 9026 %} 9027 ins_pipe( pipe_slow ); 9028 %} 9029 9030 instruct rsmax4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 9031 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9032 match(Set dst (MaxReductionV src1 src2)); 9033 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9034 format %{ "pshufd $tmp,$src2,0x1\n\t" 9035 "pminsw $tmp,$src2\n\t" 9036 "movzwl $dst,$src1\n\t" 9037 "pextrw $tmp2,$tmp, 0x0\n\t" 9038 "pminsw $dst,$tmp2\n\t" 9039 "pminsw $dst,$tmp2\n\t" 9040 "movswl $dst,$dst\t! min reduction4S" %} 9041 ins_encode %{ 9042 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 9043 __ pmaxsw($tmp$$XMMRegister, $src2$$XMMRegister); 9044 __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); 9045 __ movswl($tmp2$$Register, $tmp2$$Register); 9046 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9047 __ movswl($tmp3$$Register, $tmp3$$Register); 9048 __ cmpl($tmp2$$Register, $tmp3$$Register); 9049 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9050 __ cmpl($src1$$Register, $tmp3$$Register); 9051 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9052 __ movl($dst$$Register, $tmp3$$Register); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct rvmax4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2, rRegI tmp3) %{ 9058 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9059 match(Set dst (MaxReductionV src1 src2)); 9060 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9061 format %{ "pshufd $tmp,$src2,0x1\n\t" 9062 "pminsw $tmp,$src2\n\t" 9063 "movzwl $dst,$src1\n\t" 9064 "pextrw $tmp2,$tmp, 0x0\n\t" 9065 "pminsw $dst,$tmp2\n\t" 9066 "pminsw $dst,$tmp2\n\t" 9067 "movswl $dst,$dst\t! min reduction4S" %} 9068 ins_encode %{ 9069 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister,0x1); 9070 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 9071 __ pextrw($tmp2$$Register, $tmp$$XMMRegister,0x1); 9072 __ movswl($tmp2$$Register, $tmp2$$Register); 9073 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9074 __ movswl($tmp3$$Register, $tmp3$$Register); 9075 __ cmpl($tmp2$$Register, $tmp3$$Register); 9076 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp2$$Register); 9077 __ cmpl($src1$$Register, $tmp3$$Register); 9078 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9079 __ movl($dst$$Register, $tmp3$$Register); 9080 %} 9081 ins_pipe( pipe_slow ); 9082 %} 9083 9084 instruct rsmax8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9085 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9086 match(Set dst (MaxReductionV src1 src2)); 9087 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9088 format %{ "pshufd $tmp2,$src2,0xE\n\t" 9089 "pmaxsw $tmp2,$src2\n\t" 9090 "pshufd $tmp,$tmp2,0x1\n\t" 9091 "pmaxsw $tmp,$tmp2\n\t" 9092 "pextrw $tmp2,$tmp\n\t" 9093 "movswl $tmp2,$tmp2\n\t" 9094 "pextrw $tmp3,$tmp, 0x0\n\t" 9095 "movswl $tmp3,$tmp3\n\t" 9096 "cmpl $tmp2,$tmp3\n\t" 9097 "cmovl $tmp3,$tmp2\n\t" 9098 "cmpl $src1,$tmp3\n\t" 9099 "cmovl $tmp3,$src1\n\t" 9100 "movl $dst,$tmp3\t! max reduction8S" %} 9101 ins_encode %{ 9102 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister,0xE); 9103 __ pmaxsw($tmp2$$XMMRegister, $src2$$XMMRegister); 9104 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9105 __ pmaxsw($tmp$$XMMRegister, $tmp2$$XMMRegister); 9106 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9107 __ movswl($tmp4$$Register, $tmp4$$Register); 9108 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9109 __ movswl($tmp3$$Register, $tmp3$$Register); 9110 __ cmpl($tmp4$$Register, $tmp3$$Register); 9111 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9112 __ cmpl($src1$$Register, $tmp3$$Register); 9113 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9114 __ movl($dst$$Register, $tmp3$$Register); 9115 %} 9116 ins_pipe( pipe_slow ); 9117 %} 9118 9119 instruct rvmax8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9120 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9121 match(Set dst (MaxReductionV src1 src2)); 9122 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9123 format %{ "pshufd $tmp,$src2,0xE\n\t" 9124 "vpmaxsw $tmp,$tmp,$src2\n\t" 9125 "pshufd $tmp2,$tmp,0x1\n\t" 9126 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9127 "movzwl $dst,$src1\n\t" 9128 "pextrw $tmp3,$tmp, 0x0\n\t" 9129 "vpmaxsw $dst,$dst,$tmp3\n\t" 9130 "pextrw $tmp3,$tmp, 0x1\n\t" 9131 "vpmaxsw $dst,$dst,$tmp3\n\t" 9132 "movswl $dst,$dst\t! max reduction8S" %} 9133 ins_encode %{ 9134 int vector_len = 0; 9135 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9136 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9137 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 9138 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9139 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9140 __ movswl($tmp4$$Register, $tmp4$$Register); 9141 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9142 __ movswl($tmp3$$Register, $tmp3$$Register); 9143 __ cmpl($tmp4$$Register, $tmp3$$Register); 9144 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9145 __ cmpl($src1$$Register, $tmp3$$Register); 9146 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9147 __ movl($dst$$Register, $tmp3$$Register); 9148 %} 9149 ins_pipe( pipe_slow ); 9150 %} 9151 9152 instruct rvmax16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9153 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9154 match(Set dst (MaxReductionV src1 src2)); 9155 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9156 format %{ "vextracti128_high $tmp,$src2\n\t" 9157 "vpmaxsw $tmp,$tmp,$src2\n\t" 9158 "pshufd $tmp2,$tmp,0xE\n\t" 9159 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9160 "pshufd $tmp2,$tmp,0x1\n\t" 9161 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9162 "pextrw $tmp2,$tmp, 0x1\n\t" 9163 "movswl $tmp2,$tmp2\n\t" 9164 "pextrw $tmp3,$tmp, 0x0\n\t" 9165 "movswl $tmp3,$tmp3\n\t" 9166 "cmpl $tmp2$tmp3\n\t" 9167 "cmovl $tmp3,$tmp2\n\t" 9168 "cmpl $src1,$tmp3\n\t" 9169 "cmovl $tmp3,$src1\n\t" 9170 "movl $dst,$tmp3\t! max reduction16S" %} 9171 ins_encode %{ 9172 int vector_len = 1; 9173 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9174 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9175 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9176 __ vpmaxsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9177 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 9178 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9179 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9180 __ movswl($tmp4$$Register, $tmp4$$Register); 9181 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9182 __ movswl($tmp3$$Register, $tmp3$$Register); 9183 __ cmpl($tmp4$$Register, $tmp3$$Register); 9184 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9185 __ cmpl($src1$$Register, $tmp3$$Register); 9186 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9187 __ movl($dst$$Register, $tmp3$$Register); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct rvmax32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3, rRegI tmp4) %{ 9193 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 9194 match(Set dst (MaxReductionV src1 src2)); 9195 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP tmp4); 9196 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 9197 "vpmaxsw $tmp2,$tmp2,$src2\n\t" 9198 "vextracti128_high $tmp,$tmp2\n\t" 9199 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9200 "pshufd $tmp2,$tmp,0xE\n\t" 9201 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9202 "pshufd $tmp2,$tmp,0x1\n\t" 9203 "vpmaxsw $tmp,$tmp,$tmp2\n\t" 9204 "pextrw $tmp3,$tmp, 0x0\n\t" 9205 "movswl $dst,$src1\n\t" 9206 "pextrw $tmp3,$tmp, 0x0\n\t" 9207 "movswl $dst,$src1\n\t" 9208 "cmpl $tmp2$tmp3\n\t" 9209 "cmovl $tmp3,$tmp2\n\t" 9210 "cmpl $src1,$tmp3\n\t" 9211 "cmovl $tmp3,$src1\n\t" 9212 "movl $dst,$dst\t! max reduction32S" %} 9213 ins_encode %{ 9214 int vector_len = 2; 9215 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9216 __ vpmaxsw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9217 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 9218 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9219 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9220 __ vpmaxsw($tmp$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9221 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister,0x1); 9222 __ vpmaxsw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9223 __ pextrw($tmp4$$Register, $tmp$$XMMRegister,0x1); 9224 __ movswl($tmp4$$Register, $tmp4$$Register); 9225 __ pextrw($tmp3$$Register, $tmp$$XMMRegister,0x0); 9226 __ movswl($tmp3$$Register, $tmp3$$Register); 9227 __ cmpl($tmp4$$Register, $tmp3$$Register); 9228 __ cmovl(Assembler::greater, $tmp3$$Register, $tmp4$$Register); 9229 __ cmpl($src1$$Register, $tmp3$$Register); 9230 __ cmovl(Assembler::greater, $tmp3$$Register, $src1$$Register); 9231 __ movl($dst$$Register, $tmp3$$Register); 9232 %} 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 instruct rsmax2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 9237 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9238 match(Set dst (MaxReductionV src1 src2)); 9239 effect(TEMP tmp, TEMP tmp2); 9240 format %{ "pshufd $tmp,$src2,0x1\n\t" 9241 "pmaxsd $tmp,$src2\n\t" 9242 "movd $tmp2,$src1\n\t" 9243 "pmaxsd $tmp2,$tmp\n\t" 9244 "movd $dst,$tmp2\t! max reduction2I" %} 9245 ins_encode %{ 9246 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9247 __ pmaxsd($tmp$$XMMRegister, $src2$$XMMRegister); 9248 __ movdl($tmp2$$XMMRegister, $src1$$Register); 9249 __ pmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister); 9250 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 instruct rvmax2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 9256 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9257 match(Set dst (MaxReductionV src1 src2)); 9258 effect(TEMP tmp, TEMP tmp2); 9259 format %{ "pshufd $tmp,$src2,0x1\n\t" 9260 "vpmaxsd $tmp2,$tmp,$src2\n\t" 9261 "movd $tmp,$src1\n\t" 9262 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9263 "movd $dst,$tmp2\t! max reduction2I" %} 9264 ins_encode %{ 9265 int vector_len = 0; 9266 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9267 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9268 __ movdl($tmp$$XMMRegister, $src1$$Register); 9269 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9270 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9271 %} 9272 ins_pipe( pipe_slow ); 9273 %} 9274 9275 instruct rsmax4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 9276 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9277 match(Set dst (MaxReductionV src1 src2)); 9278 effect(TEMP tmp, TEMP tmp2); 9279 format %{ "pshufd $tmp,$src2,0xE\n\t" 9280 "pmaxsd $tmp,$src2\n\t" 9281 "pshufd $tmp2,$tmp,0x1\n\t" 9282 "pmaxsd $tmp2,$tmp\n\t" 9283 "movd $tmp,$src1\n\t" 9284 "pmaxsd $tmp2,$tmp\n\t" 9285 "movd $dst,$tmp2\t! max reduction4I" %} 9286 ins_encode %{ 9287 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9288 __ pmaxsd($tmp$$XMMRegister, $src2$$XMMRegister); 9289 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 9290 __ pmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 9291 __ movdl($tmp$$XMMRegister, $src1$$Register); 9292 __ pmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister); 9293 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9294 %} 9295 ins_pipe( pipe_slow ); 9296 %} 9297 9298 instruct rvmax4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 9299 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9300 match(Set dst (MaxReductionV src1 src2)); 9301 effect(TEMP tmp, TEMP tmp2); 9302 format %{ "pshufd $tmp,$src2,0xE\n\t" 9303 "vpmaxsd $tmp2,$tmp,$src2\n\t" 9304 "pshufd $tmp,$tmp2,0x1\n\t" 9305 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9306 "movd $tmp,$src1\n\t" 9307 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9308 "movd $dst,$tmp2\t! max reduction4I" %} 9309 ins_encode %{ 9310 int vector_len = 0; 9311 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9312 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9313 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9314 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9315 __ movdl($tmp$$XMMRegister, $src1$$Register); 9316 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9317 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9318 %} 9319 ins_pipe( pipe_slow ); 9320 %} 9321 9322 instruct rvmax4I_reduction_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 9323 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9324 match(Set dst (MaxReductionV src1 src2)); 9325 effect(TEMP tmp, TEMP tmp2); 9326 format %{ "pshufd $tmp,$src2,0xE\n\t" 9327 "vpmaxsd $tmp2,$tmp,$src2\n\t" 9328 "pshufd $tmp,$tmp2,0x1\n\t" 9329 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9330 "movd $tmp,$src1\n\t" 9331 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9332 "movd $dst,$tmp2\t! max reduction4I" %} 9333 ins_encode %{ 9334 int vector_len = 0; 9335 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9336 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9337 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9338 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9339 __ movdl($tmp$$XMMRegister, $src1$$Register); 9340 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9341 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 instruct rvmax8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 9347 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9348 match(Set dst (MaxReductionV src1 src2)); 9349 effect(TEMP tmp, TEMP tmp2); 9350 format %{ "vextracti128_high $tmp,$src2\n\t" 9351 "vpmaxsd $tmp,$tmp,$src2\n\t" 9352 "pshufd $tmp2,$tmp,0xE\n\t" 9353 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9354 "pshufd $tmp,$tmp2,0x1\n\t" 9355 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9356 "movd $tmp,$src1\n\t" 9357 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9358 "movd $dst,$tmp2\t! max reduction8I" %} 9359 ins_encode %{ 9360 int vector_len = 1; 9361 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9362 __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9363 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9364 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9365 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9366 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9367 __ movdl($tmp$$XMMRegister, $src1$$Register); 9368 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9369 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 instruct rvmax8I_reduction_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 9375 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9376 match(Set dst (MaxReductionV src1 src2)); 9377 effect(TEMP tmp, TEMP tmp2); 9378 format %{ "vextracti128_high $tmp,$src2\n\t" 9379 "vpmaxsd $tmp,$tmp,$src2\n\t" 9380 "pshufd $tmp2,$tmp,0xE\n\t" 9381 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9382 "pshufd $tmp,$tmp2,0x1\n\t" 9383 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9384 "movd $tmp,$src1\n\t" 9385 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9386 "movd $dst,$tmp2\t! max reduction8I" %} 9387 ins_encode %{ 9388 int vector_len = 1; 9389 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9390 __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9391 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9392 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9393 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9394 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9395 __ movdl($tmp$$XMMRegister, $src1$$Register); 9396 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9397 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct rvmax16I_reduction_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 9403 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 9404 match(Set dst (MaxReductionV src1 src2)); 9405 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9406 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 9407 "vpmaxsd $tmp3,$tmp3,$src2\n\t" 9408 "vextracti128_high $tmp,$tmp3\n\t" 9409 "vpmaxsd $tmp,$tmp,$tmp3\n\t" 9410 "pshufd $tmp2,$tmp,0xE\n\t" 9411 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9412 "pshufd $tmp,$tmp2,0x1\n\t" 9413 "vpmaxsd $tmp2,$tmp2,$tmp\n\t" 9414 "movd $tmp,$src1\n\t" 9415 "vpmaxsd $tmp2,$tmp,$tmp2\n\t" 9416 "movd $dst,$tmp2\t! max reduction16I" %} 9417 ins_encode %{ 9418 int vector_len = 2; 9419 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 9420 __ vpmaxsd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 9421 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 9422 __ vpmaxsd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9423 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 9424 __ vpmaxsd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9425 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister,0x1); 9426 __ vpmaxsd($tmp2$$XMMRegister,$tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9427 __ movdl($tmp$$XMMRegister, $src1$$Register); 9428 __ vpmaxsd($tmp2$$XMMRegister,$tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9429 __ movdl($dst$$Register, $tmp2$$XMMRegister); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 // Long Max Reduction 9435 instruct rsmax1L_reduction_reg(rRegL dst, rRegL src1, vecD src2, rxmm0 xmm_0, regF tmp2, regF tmp3) %{ 9436 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9437 match(Set dst (MaxReductionV src1 src2)); 9438 effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); 9439 format %{ "movdq $xmm_0,$src1\n\t" 9440 "movdq $tmp2,$src1\n\t" 9441 "pcmpgtq $xmm_0,$src2\n\t" 9442 "blendvpd $tmp2,$src2\n\t" 9443 "movdq $dst,$tmp2\t! max reduction1L" %} 9444 ins_encode %{ 9445 __ movdq($xmm_0$$XMMRegister,$src1$$Register); 9446 __ movdq($tmp2$$XMMRegister,$src1$$Register); 9447 __ movdq($tmp3$$XMMRegister,$src2$$Register); 9448 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 9449 __ blendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister); 9450 __ movdq($dst$$Register, $tmp3$$XMMRegister); 9451 %} 9452 ins_pipe( pipe_slow ); 9453 %} 9454 9455 instruct rsmax2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, rxmm0 xmm_0, regF tmp2, regF tmp3) %{ 9456 predicate(UseSSE > 3 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9457 match(Set dst (MaxReductionV src1 src2)); 9458 effect(TEMP xmm_0, TEMP tmp2, TEMP tmp3); 9459 format %{ "pshufd $tmp3,$src2,0xE\n\t" 9460 "movdqu $xmm_0,$src2\n\t" 9461 "pcmpgtq $xmm_0,$tmp3\n\t" 9462 "blendvpd $tmp3,$src2\n\t" 9463 "movdqu $xmm_0,$tmp3\n\t" 9464 "movdq $tmp2,$src1\n\t" 9465 "pcmpgtq $xmm_0,$tmp2\n\t" 9466 "blendvpd $tmp2,$tmp3\n\t" 9467 "movq $dst,$tmp2\t! max reduction2L" %} 9468 ins_encode %{ 9469 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 0xE); 9470 __ movdqu($xmm_0$$XMMRegister, $src2$$XMMRegister); 9471 __ pcmpgtq($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 9472 __ blendvpd($tmp3$$XMMRegister, $src2$$XMMRegister); 9473 __ movdqu($xmm_0$$XMMRegister, $tmp3$$XMMRegister); 9474 __ movdq($tmp2$$XMMRegister, $src1$$Register); 9475 __ pcmpgtq($xmm_0$$XMMRegister, $tmp2$$XMMRegister); 9476 __ blendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister); 9477 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9478 %} 9479 ins_pipe( pipe_slow ); 9480 %} 9481 9482 instruct rvmax2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2, regF tmp3) %{ 9483 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9484 match(Set dst (MaxReductionV src1 src2)); 9485 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9486 format %{ "pshufd $tmp2,$src2,0xE\n\t" 9487 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 9488 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 9489 "movq $tmp,$src1\n\t" 9490 "vpcmpgtq $tmp3,$tmp2,$tmp\n\t" 9491 "blendvpd $tmp2,$tmp2,$src1,$tmp3\n\t" 9492 "movq $dst,$tmp2\t! max reduction2L" %} 9493 ins_encode %{ 9494 int vector_len = 0; 9495 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 9496 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9497 __ vblendvpd($tmp2$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9498 __ movdq($tmp$$XMMRegister,$src1$$Register); 9499 __ vpcmpgtq($tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9500 __ vblendvpd($tmp2$$XMMRegister, $tmp$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, vector_len); 9501 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9502 %} 9503 ins_pipe( pipe_slow ); 9504 %} 9505 9506 instruct rvmax4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{ 9507 predicate(UseAVX > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9508 match(Set dst (MaxReductionV src1 src2)); 9509 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9510 format %{ "vextracti128_high $tmp2,$src2\n\t" 9511 "vpcmpgtq $tmp,$tmp2,$src2\n\t" 9512 "vblendvpd $tmp2,$tmp2,$src2,$tmp\n\t" 9513 "vpshufd $tmp3, $tmp2,0x1\n\t" 9514 "vpcmpgtq $tmp, $tmp3,$tmp\n\t2" 9515 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 9516 "movq $tmp2,$src1\n\t" 9517 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 9518 "blendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 9519 "movq $dst,$tmp2\t! max reduction2L" %} 9520 ins_encode %{ 9521 int vector_len = 1; 9522 __ vextracti128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9523 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 9524 __ vblendvpd($tmp2$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9525 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9526 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9527 __ vblendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9528 __ movdq($tmp$$XMMRegister,$src1$$Register); 9529 __ vpcmpgtq($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9530 __ vblendvpd($tmp2$$XMMRegister, $tmp$$XMMRegister,$tmp3$$XMMRegister,$tmp2$$XMMRegister, vector_len); 9531 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct rvmax8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 9537 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 9538 match(Set dst (MaxReductionV src1 src2)); 9539 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 9540 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 9541 "vpcmpgtq $tmp,$tmp3,$src2\n\t" 9542 "vblendvpd $tmp3,$tmp3,$src2,$tmp\n\t" 9543 "vextracti128_high $tmp2,$tmp3\n\t" 9544 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 9545 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 9546 "vpshufd $tmp3,$tmp2,0x1\n\t" 9547 "vpcmpgtq $tmp,$tmp3,$tmp2\n\t" 9548 "vblendvpd $tmp3,$tmp3,$tmp2,$tmp\n\t" 9549 "movq $tmp2,$src1\n\t" 9550 "vpcmpgtq $tmp,$tmp2,$tmp3\n\t" 9551 "vblendvpd $tmp2,$tmp2,$tmp3,$tmp\n\t" 9552 "movq $dst,$tmp2\t! max reduction4I" %} 9553 ins_encode %{ 9554 int vector_len = 1; 9555 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 9556 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 9557 __ vblendvpd($tmp3$$XMMRegister, $src2$$XMMRegister, $tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9558 __ vextracti128_high($tmp2$$XMMRegister, $tmp3$$XMMRegister); 9559 __ vpcmpgtq($tmp$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9560 __ vblendvpd($tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9561 __ pshufd($tmp3$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9562 __ vpcmpgtq($tmp$$XMMRegister, $tmp3$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9563 __ vblendvpd($tmp3$$XMMRegister,$tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp$$XMMRegister, vector_len); 9564 __ movdq($tmp2$$XMMRegister, $src1$$Register); 9565 __ vpcmpgtq($tmp$$XMMRegister,$tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 9566 __ vblendvpd($tmp2$$XMMRegister,$tmp3$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 9567 __ movdq($dst$$Register, $tmp2$$XMMRegister); 9568 %} 9569 ins_pipe( pipe_slow ); 9570 %} 9571 9572 // Float max Reduction 9573 instruct rsmax2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 9574 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9575 match(Set dst (MaxReductionV dst src2)); 9576 effect(TEMP dst, TEMP tmp); 9577 format %{ "maxps $dst, $src2\n\t" 9578 "pshufd $tmp,$src2,0x1\n\t" 9579 "maxps $dst,$tmp\t! max reduction2F" %} 9580 ins_encode %{ 9581 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 9582 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9583 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9584 %} 9585 ins_pipe( pipe_slow ); 9586 %} 9587 9588 instruct rvmax2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 9589 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9590 match(Set dst (MaxReductionV dst src2)); 9591 effect(TEMP dst, TEMP tmp); 9592 format %{ "vmaxps $dst,$dst,$src2\n\t" 9593 "pshufd $tmp,$src2,0x1\n\t" 9594 "vmaxps $dst,$dst,$tmp\t! max reduction2F" %} 9595 ins_encode %{ 9596 int vector_len = 0; 9597 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9598 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9599 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 instruct rsmax4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 9605 predicate(UseSSE > 0 && UseAVX == 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9606 match(Set dst (MaxReductionV dst src2)); 9607 effect(TEMP tmp, TEMP dst); 9608 format %{ "maxps $dst,$src2\n\t" 9609 "pshufd $tmp,$src2,0x1\n\t" 9610 "maxps $dst,tmp\n\t" 9611 "pshufd $tmp,$src2,0x2\n\t" 9612 "maxps $dst,tmp\n\t" 9613 "pshufd $tmp,$src2,0x3\n\t" 9614 "maxps $dst,$tmp\t! max reduction4F" %} 9615 ins_encode %{ 9616 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 9617 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9618 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9619 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 9620 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9621 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 9622 __ maxps($dst$$XMMRegister, $tmp$$XMMRegister); 9623 %} 9624 ins_pipe( pipe_slow ); 9625 %} 9626 9627 instruct rvmax4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 9628 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9629 match(Set dst (MaxReductionV dst src2)); 9630 effect(TEMP tmp, TEMP dst); 9631 format %{ "vmaxps $dst,$dst,$src2\n\t" 9632 "pshufd $tmp,$src2,0x1\n\t" 9633 "vmaxps $dst,$dst,tmp\n\t" 9634 "pshufd $tmp,$src2,0x2\n\t" 9635 "vmaxps $dst,$dst,tmp\n\t" 9636 "pshufd $tmp,$src2,0x3\n\t" 9637 "vmaxps $dst,$dst,$tmp\t! max reduction4F" %} 9638 ins_encode %{ 9639 int vector_len = 0; 9640 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9641 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9642 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9643 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x2); 9644 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9645 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x3); 9646 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9647 %} 9648 ins_pipe( pipe_slow ); 9649 %} 9650 9651 instruct rvmax8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 9652 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9653 match(Set dst (MaxReductionV dst src2)); 9654 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9655 format %{ "vmaxps $dst,$dst,$src2\n\t" 9656 "pshufd $tmp,$src2,0x01\n\t" 9657 "vmaxps $dst,$dst,$tmp\n\t" 9658 "pshufd $tmp,$src2,0x02\n\t" 9659 "vmaxps $dst,$dst,$tmp\n\t" 9660 "pshufd $tmp,$src2,0x03\n\t" 9661 "vmaxps $dst,$dst,$tmp\n\t" 9662 "vextractf128_high $tmp2,$src2\n\t" 9663 "vmaxps $dst,$dst,$tmp2\n\t" 9664 "pshufd $tmp,$tmp2,0x01\n\t" 9665 "vmaxps $dst,$dst,$tmp\n\t" 9666 "pshufd $tmp,$tmp2,0x02\n\t" 9667 "vmaxps $dst,$dst,$tmp\n\t" 9668 "pshufd $tmp,$tmp2,0x03\n\t" 9669 "vmaxps $dst,$dst,$tmp\t! sub reduction8F" %} 9670 ins_encode %{ 9671 int vector_len = 1; 9672 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9673 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 9674 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9675 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 9676 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9677 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 9678 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9679 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9680 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9681 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9682 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9683 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9684 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9685 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9686 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9687 %} 9688 ins_pipe( pipe_slow ); 9689 %} 9690 9691 instruct rvmax16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 9692 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 9693 match(Set dst (MaxReductionV dst src2)); 9694 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9695 format %{ "vmaxps $dst,$dst,$src2\n\t" 9696 "pshufd $tmp,$src2,0x01\n\t" 9697 "vmaxps $dst,$dst,$tmp\n\t" 9698 "pshufd $tmp,$src2,0x02\n\t" 9699 "vmaxps $dst,$dst,$tmp\n\t" 9700 "pshufd $tmp,$src2,0x03\n\t" 9701 "vmaxps $dst,$dst,$tmp\n\t" 9702 "vextractf32x4 $tmp2,$src2,0x1\n\t" 9703 "vmaxps $dst,$dst,$tmp2\n\t" 9704 "pshufd $tmp,$tmp2,0x01\n\t" 9705 "vmaxps $dst,$dst,$tmp\n\t" 9706 "pshufd $tmp,$tmp2,0x02\n\t" 9707 "vmaxps $dst,$dst,$tmp\n\t" 9708 "pshufd $tmp,$tmp2,0x03\n\t" 9709 "vmaxps $dst,$dst,$tmp\n\t" 9710 "vextractf32x4 $tmp2,$src2,0x2\n\t" 9711 "vmaxps $dst,$dst,$tmp2\n\t" 9712 "pshufd $tmp,$tmp2,0x01\n\t" 9713 "vmaxps $dst,$dst,$tmp\n\t" 9714 "pshufd $tmp,$tmp2,0x02\n\t" 9715 "vmaxps $dst,$dst,$tmp\n\t" 9716 "pshufd $tmp,$tmp2,0x03\n\t" 9717 "vmaxps $dst,$dst,$tmp\n\t" 9718 "vextractf32x4 $tmp2,$src2,0x3\n\t" 9719 "vmaxps $dst,$dst,$tmp2\n\t" 9720 "pshufd $tmp,$tmp2,0x01\n\t" 9721 "vmaxps $dst,$dst,$tmp\n\t" 9722 "pshufd $tmp,$tmp2,0x02\n\t" 9723 "vmaxps $dst,$dst,$tmp\n\t" 9724 "pshufd $tmp,$tmp2,0x03\n\t" 9725 "vmaxps $dst,$dst,$tmp\t! sub reduction16F" %} 9726 ins_encode %{ 9727 int vector_len = 2; 9728 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9729 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 9730 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9731 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 9732 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9733 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 9734 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9735 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 9736 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9737 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9738 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9739 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9740 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9741 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9742 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9743 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 9744 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9745 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9746 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9747 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9748 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9749 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9750 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9751 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 9752 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9753 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 9754 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9755 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 9756 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9757 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 9758 __ vmaxps($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9759 %} 9760 ins_pipe( pipe_slow ); 9761 %} 9762 9763 instruct rsmax2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 9764 predicate(UseSSE >= 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9765 match(Set dst (MaxReductionV dst src2)); 9766 effect(TEMP tmp, TEMP dst); 9767 format %{ "maxpd $dst,$src2\n\t" 9768 "pshufd $tmp,$src2,0xE\n\t" 9769 "maxpd $dst,$tmp\t! max reduction2D" %} 9770 ins_encode %{ 9771 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 9772 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9773 __ maxpd($dst$$XMMRegister, $tmp$$XMMRegister); 9774 %} 9775 ins_pipe( pipe_slow ); 9776 %} 9777 9778 instruct rvmax2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 9779 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9780 match(Set dst (MaxReductionV dst src2)); 9781 effect(TEMP tmp, TEMP dst); 9782 format %{ "vmaxpd $dst,$dst,$src2\n\t" 9783 "pshufd $tmp,$src2,0xE\n\t" 9784 "vmaxpd $dst,$dst,$tmp\t! max reduction2D" %} 9785 ins_encode %{ 9786 int vector_len = 0; 9787 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9788 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9789 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9790 %} 9791 ins_pipe( pipe_slow ); 9792 %} 9793 9794 instruct rvmax4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 9795 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9796 match(Set dst (MaxReductionV dst src2)); 9797 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9798 format %{ "vmaxpd $dst,$dst,$src2\n\t" 9799 "pshufd $tmp,$src2,0xE\n\t" 9800 "vmaxpd $dst,$dst,$tmp\n\t" 9801 "vextractf32x4 $tmp2,$src2,0x1\n\t" 9802 "vmaxpd $dst,$dst,$tmp2\n\t" 9803 "pshufd $tmp,$tmp2,0xE\n\t" 9804 "vmaxpd $dst,$dst,$tmp\t! max reduction4D" %} 9805 ins_encode %{ 9806 int vector_len = 1; 9807 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9808 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9809 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9810 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 9811 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9812 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9813 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9814 %} 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct rvmax8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 9819 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 9820 match(Set dst (MaxReductionV dst src2)); 9821 effect(TEMP tmp, TEMP dst, TEMP tmp2); 9822 format %{ "vmaxpd $dst,$dst,$src2\n\t" 9823 "pshufd $tmp,$src2,0xE\n\t" 9824 "vmaxpd $dst,$dst,$tmp\n\t" 9825 "vextractf32x4 $tmp2,$src2,0x1\n\t" 9826 "vmaxpd $dst,$dst,$tmp2\n\t" 9827 "pshufd $tmp,$tmp2,0xE\n\t" 9828 "vmaxpd $dst,$dst,$tmp\n\t" 9829 "vextractf32x4 $tmp2,$src2,0x2\n\t" 9830 "vmaxpd $dst,$dst,$tmp2\n\t" 9831 "pshufd $tmp,$tmp2,0xE\n\t" 9832 "vmaxpd $dst,$dst,$tmp\n\t" 9833 "vextractf32x4 $tmp2,$src2,0x3\n\t" 9834 "vmaxpd $dst,$dst,$tmp2\n\t" 9835 "pshufd $tmp,$tmp2,0xE\n\t" 9836 "vmaxpd $dst,$dst,$tmp\t! max reduction8D" %} 9837 ins_encode %{ 9838 int vector_len = 2; 9839 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister, vector_len); 9840 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9841 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9842 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 9843 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9844 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9845 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9846 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 9847 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9848 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9849 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9850 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 9851 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9852 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 9853 __ vmaxpd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 9854 %} 9855 ins_pipe( pipe_slow ); 9856 %} 9857 9858 instruct rsand8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 9859 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9860 match(Set dst (AndReductionV src1 src2)); 9861 effect(TEMP tmp, TEMP tmp2, TEMP dst); 9862 format %{ 9863 "pshufd $tmp,$src2,0x1\n\t" 9864 "pand $tmp,$src2\n\t" 9865 "movzbl $dst,$src1\n\t" 9866 "pextrb $tmp2,$tmp, 0x0\n\t" 9867 "andl $dst,$tmp2\n\t" 9868 "pextrb $tmp2,$tmp, 0x1\n\t" 9869 "andl $dst,$tmp2\n\t" 9870 "pextrb $tmp2,$tmp, 0x2\n\t" 9871 "andl $dst,$tmp2\n\t" 9872 "pextrb $tmp2,$tmp, 0x3\n\t" 9873 "andl $dst,$tmp2\n\t" 9874 "movsbl $dst,$dst\t! and reduction8B" %} 9875 ins_encode %{ 9876 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 9877 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 9878 __ movzbl($dst$$Register, $src1$$Register); 9879 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 9880 __ andl($dst$$Register, $tmp2$$Register); 9881 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 9882 __ andl($dst$$Register, $tmp2$$Register); 9883 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 9884 __ andl($dst$$Register, $tmp2$$Register); 9885 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 9886 __ andl($dst$$Register, $tmp2$$Register); 9887 __ movsbl($dst$$Register, $dst$$Register); 9888 %} 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct rsand16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 9893 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9894 match(Set dst (AndReductionV src1 src2)); 9895 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 9896 format %{ "pshufd $tmp,$src2,0xE\n\t" 9897 "pand $tmp,$src2\n\t" 9898 "pshufd $tmp2,$tmp,0x1\n\t" 9899 "pand $tmp,$tmp,$tmp2\n\t" 9900 "movzbl $dst,$src1\n\t" 9901 "pextrb $tmp3,$tmp, 0x0\n\t" 9902 "andl $dst,$tmp3\n\t" 9903 "pextrb $tmp3,$tmp, 0x1\n\t" 9904 "andl $dst,$tmp3\n\t" 9905 "pextrb $tmp3,$tmp, 0x2\n\t" 9906 "andl $dst,$tmp3\n\t" 9907 "pextrb $tmp3,$tmp, 0x3\n\t" 9908 "andl $dst,$tmp3\n\t" 9909 "movsbl $dst,$dst\t! and reduction16B" %} 9910 ins_encode %{ 9911 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 9912 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 9913 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 9914 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 9915 __ movzbl($dst$$Register, $src1$$Register); 9916 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 9917 __ andl($dst$$Register, $tmp3$$Register); 9918 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 9919 __ andl($dst$$Register, $tmp3$$Register); 9920 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 9921 __ andl($dst$$Register, $tmp3$$Register); 9922 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 9923 __ andl($dst$$Register, $tmp3$$Register); 9924 __ movsbl($dst$$Register, $dst$$Register); 9925 %} 9926 ins_pipe( pipe_slow ); 9927 %} 9928 9929 instruct rvand32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 9930 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9931 match(Set dst (AndReductionV src1 src2)); 9932 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 9933 format %{ "vextracti128_high $tmp,$src2\n\t" 9934 "vpand $tmp,$tmp,$src2\n\t" 9935 "pshufd $tmp2,$tmp,0xE\n\t" 9936 "vpand $tmp,$tmp,$tmp2\n\t" 9937 "pshufd $tmp2,$tmp,0x1\n\t" 9938 "vpand $tmp,$tmp,$tmp2\n\t" 9939 "movzbl $dst,$src1\n\t" 9940 "pextrb $tmp3,$tmp, 0x0\n\t" 9941 "andl $dst,$tmp3\n\t" 9942 "pextrb $tmp3,$tmp, 0x1\n\t" 9943 "andl $dst,$tmp3\n\t" 9944 "pextrb $tmp3,$tmp, 0x2\n\t" 9945 "andl $dst,$tmp3\n\t" 9946 "pextrb $tmp3,$tmp, 0x3\n\t" 9947 "andl $dst,$tmp3\n\t" 9948 "movsbl $dst,$dst\t! and reduction32B" %} 9949 ins_encode %{ 9950 int vector_len = 0; 9951 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 9952 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 9953 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 9954 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9955 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 9956 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9957 __ movzbl($dst$$Register, $src1$$Register); 9958 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 9959 __ andl($dst$$Register, $tmp3$$Register); 9960 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 9961 __ andl($dst$$Register, $tmp3$$Register); 9962 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 9963 __ andl($dst$$Register, $tmp3$$Register); 9964 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 9965 __ andl($dst$$Register, $tmp3$$Register); 9966 __ movsbl($dst$$Register, $dst$$Register); 9967 %} 9968 ins_pipe( pipe_slow ); 9969 %} 9970 9971 instruct rvand64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 9972 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 9973 match(Set dst (AndReductionV src1 src2)); 9974 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 9975 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 9976 "vpand $tmp2,$tmp2,$src2\n\t" 9977 "vextracti128_high $tmp,$tmp2\n\t" 9978 "vpand $tmp,$tmp,$tmp2\n\t" 9979 "pshufd $tmp2,$tmp,0xE\n\t" 9980 "vpand $tmp,$tmp,$tmp2\n\t" 9981 "pshufd $tmp2,$tmp,0x1\n\t" 9982 "vpand $tmp,$tmp,$tmp2\n\t" 9983 "movzbl $dst,$src1\n\t" 9984 "movdl $tmp3,$tmp\n\t" 9985 "andl $dst,$tmp3\n\t" 9986 "shrl $tmp3,0x8\n\t" 9987 "andl $dst,$tmp3\n\t" 9988 "shrl $tmp3,0x8\n\t" 9989 "andl $dst,$tmp3\n\t" 9990 "shrl $tmp3,0x8\n\t" 9991 "andl $dst,$tmp3\n\t" 9992 "movsbl $dst,$dst\t! and reduction64B" %} 9993 ins_encode %{ 9994 int vector_len = 0; 9995 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 9996 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 9997 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 9998 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 9999 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10000 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10001 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10002 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10003 __ movzbl($dst$$Register, $src1$$Register); 10004 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10005 __ andl($dst$$Register, $tmp3$$Register); 10006 __ shrl($tmp3$$Register, 8); 10007 __ andl($dst$$Register, $tmp3$$Register); 10008 __ shrl($tmp3$$Register, 8); 10009 __ andl($dst$$Register, $tmp3$$Register); 10010 __ shrl($tmp3$$Register, 8); 10011 __ andl($dst$$Register, $tmp3$$Register); 10012 __ movsbl($dst$$Register, $dst$$Register); 10013 %} 10014 ins_pipe( pipe_slow ); 10015 %} 10016 10017 instruct rsand4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10018 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10019 match(Set dst (AndReductionV src1 src2)); 10020 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10021 format %{ 10022 "pshufd $tmp,$src2,0x1\n\t" 10023 "pand $tmp,$src2\n\t" 10024 "movzwl $dst,$src1\n\t" 10025 "pextrw $tmp2,$tmp, 0x0\n\t" 10026 "andw $dst,$tmp2\n\t" 10027 "pextrw $tmp2,$tmp, 0x1\n\t" 10028 "andw $dst,$tmp2\n\t" 10029 "movswl $dst,$dst\t! and reduction4S" %} 10030 ins_encode %{ 10031 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10032 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 10033 __ movzwl($dst$$Register, $src1$$Register); 10034 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10035 __ andw($dst$$Register, $tmp2$$Register); 10036 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10037 __ andw($dst$$Register, $tmp2$$Register); 10038 __ movswl($dst$$Register, $dst$$Register); 10039 %} 10040 ins_pipe( pipe_slow ); 10041 %} 10042 10043 instruct rsand8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10044 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10045 match(Set dst (AndReductionV src1 src2)); 10046 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10047 format %{ "pshufd $tmp,$src2,0xE\n\t" 10048 "pand $tmp,$src2\n\t" 10049 "pshufd $tmp2,$tmp,0x1\n\t" 10050 "pand $tmp,$tmp,$tmp2\n\t" 10051 "movzwl $dst,$src1\n\t" 10052 "pextrw $tmp3,$tmp, 0x0\n\t" 10053 "andw $dst,$tmp3\n\t" 10054 "pextrw $tmp3,$tmp, 0x1\n\t" 10055 "andw $dst,$tmp3\n\t" 10056 "movswl $dst,$dst\t! and reduction8S" %} 10057 ins_encode %{ 10058 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10059 __ pand($tmp$$XMMRegister, $src2$$XMMRegister); 10060 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10061 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 10062 __ movzwl($dst$$Register, $src1$$Register); 10063 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10064 __ andw($dst$$Register, $tmp3$$Register); 10065 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10066 __ andw($dst$$Register, $tmp3$$Register); 10067 __ movswl($dst$$Register, $dst$$Register); 10068 %} 10069 ins_pipe( pipe_slow ); 10070 %} 10071 10072 instruct rvand16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10073 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10074 match(Set dst (AndReductionV src1 src2)); 10075 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10076 format %{ "vextracti128_high $tmp,$src2\n\t" 10077 "vpand $tmp,$tmp,$src2\n\t" 10078 "pshufd $tmp2,$tmp,0xE\n\t" 10079 "vpand $tmp,$tmp,$tmp2\n\t" 10080 "pshufd $tmp2,$tmp,0x1\n\t" 10081 "vpand $tmp,$tmp,$tmp2\n\t" 10082 "movzwl $dst,$src1\n\t" 10083 "pextrw $tmp3,$tmp, 0x0\n\t" 10084 "andw $dst,$tmp3\n\t" 10085 "pextrw $tmp3,$tmp, 0x1\n\t" 10086 "andw $dst,$tmp3\n\t" 10087 "movswl $dst,$dst\t! and reduction16S" %} 10088 ins_encode %{ 10089 int vector_len = 0; 10090 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10091 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10092 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10093 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10094 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10095 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10096 __ movzwl($dst$$Register, $src1$$Register); 10097 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10098 __ andw($dst$$Register, $tmp3$$Register); 10099 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10100 __ andw($dst$$Register, $tmp3$$Register); 10101 __ movswl($dst$$Register, $dst$$Register); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 instruct rvand32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10107 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10108 match(Set dst (AndReductionV src1 src2)); 10109 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10110 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10111 "vpand $tmp2,$tmp2,$src2\n\t" 10112 "vextracti128_high $tmp,$tmp2\n\t" 10113 "vpand $tmp,$tmp,$tmp2\n\t" 10114 "pshufd $tmp2,$tmp,0xE\n\t" 10115 "vpand $tmp,$tmp,$tmp2\n\t" 10116 "pshufd $tmp2,$tmp,0x1\n\t" 10117 "vpand $tmp,$tmp,$tmp2\n\t" 10118 "movzwl $dst,$src1\n\t" 10119 "movdl $tmp3,$tmp\n\t" 10120 "andw $dst,$tmp3\n\t" 10121 "shrl $tmp3,0x16\n\t" 10122 "andw $dst,$tmp3\n\t" 10123 "movswl $dst,$dst\t! and reduction32S" %} 10124 ins_encode %{ 10125 int vector_len = 0; 10126 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10127 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10128 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10129 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10130 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10131 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10132 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10133 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10134 __ movzwl($dst$$Register, $src1$$Register); 10135 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10136 __ andw($dst$$Register, $tmp3$$Register); 10137 __ shrl($tmp3$$Register, 16); 10138 __ andw($dst$$Register, $tmp3$$Register); 10139 __ movswl($dst$$Register, $dst$$Register); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct rsand2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 10145 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10146 match(Set dst (AndReductionV src1 src2)); 10147 effect(TEMP tmp, TEMP tmp2); 10148 format %{ "pshufd $tmp2,$src2,0x1\n\t" 10149 "pand $tmp2,$src2\n\t" 10150 "movd $tmp,$src1\n\t" 10151 "pand $tmp2,$tmp\n\t" 10152 "movd $dst,$tmp2\t! and reduction2I" %} 10153 ins_encode %{ 10154 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 10155 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 10156 __ movdl($tmp$$XMMRegister, $src1$$Register); 10157 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10158 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10159 %} 10160 ins_pipe( pipe_slow ); 10161 %} 10162 10163 instruct rsand4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 10164 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10165 match(Set dst (AndReductionV src1 src2)); 10166 effect(TEMP tmp, TEMP tmp2); 10167 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10168 "pand $tmp2,$src2\n\t" 10169 "pshufd $tmp,$tmp2,0x1\n\t" 10170 "pand $tmp2,$tmp\n\t" 10171 "movd $tmp,$src1\n\t" 10172 "pand $tmp2,$tmp\n\t" 10173 "movd $dst,$tmp2\t! and reduction4I" %} 10174 ins_encode %{ 10175 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10176 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 10177 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 10178 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10179 __ movdl($tmp$$XMMRegister, $src1$$Register); 10180 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10181 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10182 %} 10183 ins_pipe( pipe_slow ); 10184 %} 10185 10186 instruct rvand8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 10187 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10188 match(Set dst (AndReductionV src1 src2)); 10189 effect(TEMP tmp, TEMP tmp2); 10190 format %{ "vextracti128_high $tmp,$src2\n\t" 10191 "vpand $tmp,$tmp,$src2\n\t" 10192 "vpshufd $tmp2,$tmp,0xE\n\t" 10193 "vpand $tmp,$tmp,$tmp2\n\t" 10194 "vpshufd $tmp2,$tmp,0x1\n\t" 10195 "vpand $tmp,$tmp,$tmp2\n\t" 10196 "movd $tmp2,$src1\n\t" 10197 "vpand $tmp2,$tmp,$tmp2\n\t" 10198 "movd $dst,$tmp2\t! and reduction8I" %} 10199 ins_encode %{ 10200 int vector_len = 0; 10201 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10202 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10203 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10204 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10205 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10206 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10207 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10208 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10209 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10210 %} 10211 ins_pipe( pipe_slow ); 10212 %} 10213 10214 instruct rvand16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 10215 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10216 match(Set dst (AndReductionV src1 src2)); 10217 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 10218 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 10219 "vpand $tmp3,$tmp3,$src2\n\t" 10220 "vextracti128_high $tmp,$tmp3\n\t" 10221 "vpand $tmp,$tmp,$src2\n\t" 10222 "vpshufd $tmp2,$tmp,0xE\n\t" 10223 "vpand $tmp,$tmp,$tmp2\n\t" 10224 "vpshufd $tmp2,$tmp,0x1\n\t" 10225 "vpand $tmp,$tmp,$tmp2\n\t" 10226 "movd $tmp2,$src1\n\t" 10227 "vpand $tmp2,$tmp,$tmp2\n\t" 10228 "movd $dst,$tmp2\t! and reduction16I" %} 10229 ins_encode %{ 10230 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 10231 __ vpand($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 10232 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 10233 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 10234 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, 0); 10235 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 10236 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, 0); 10237 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 10238 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10239 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 10240 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10241 %} 10242 ins_pipe( pipe_slow ); 10243 %} 10244 10245 #ifdef _LP64 10246 instruct rsand2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 10247 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10248 match(Set dst (AndReductionV src1 src2)); 10249 effect(TEMP tmp, TEMP tmp2); 10250 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10251 "pand $tmp2,$src2\n\t" 10252 "movdq $tmp,$src1\n\t" 10253 "pand $tmp2,$tmp\n\t" 10254 "movq $dst,$tmp2\t! and reduction2L" %} 10255 ins_encode %{ 10256 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10257 __ pand($tmp2$$XMMRegister, $src2$$XMMRegister); 10258 __ movdq($tmp$$XMMRegister, $src1$$Register); 10259 __ pand($tmp2$$XMMRegister, $tmp$$XMMRegister); 10260 __ movq($dst$$Register, $tmp2$$XMMRegister); 10261 %} 10262 ins_pipe( pipe_slow ); 10263 %} 10264 10265 instruct rvand4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 10266 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10267 match(Set dst (AndReductionV src1 src2)); 10268 effect(TEMP tmp, TEMP tmp2); 10269 format %{ "vextracti128_high $tmp,$src2\n\t" 10270 "vpand $tmp2,$tmp,$src2\n\t" 10271 "vpshufd $tmp,$tmp2,0xE\n\t" 10272 "vpand $tmp2,$tmp2,$tmp\n\t" 10273 "movq $tmp,$src1\n\t" 10274 "vpand $tmp2,$tmp2,$tmp\n\t" 10275 "movq $dst,$tmp2\t! and reduction4L" %} 10276 ins_encode %{ 10277 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10278 __ vpand($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 10279 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 10280 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10281 __ movq($tmp$$XMMRegister, $src1$$Register); 10282 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10283 __ movq($dst$$Register, $tmp2$$XMMRegister); 10284 %} 10285 ins_pipe( pipe_slow ); 10286 %} 10287 10288 instruct rvand8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 10289 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10290 match(Set dst (AndReductionV src1 src2)); 10291 effect(TEMP tmp, TEMP tmp2); 10292 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10293 "vpandq $tmp2,$tmp2,$src2\n\t" 10294 "vextracti128_high $tmp,$tmp2\n\t" 10295 "vpandq $tmp2,$tmp2,$tmp\n\t" 10296 "vpshufd $tmp,$tmp2,0xE\n\t" 10297 "vpandq $tmp2,$tmp2,$tmp\n\t" 10298 "movdq $tmp,$src1\n\t" 10299 "vpandq $tmp2,$tmp2,$tmp\n\t" 10300 "movdq $dst,$tmp2\t! and reduction8L" %} 10301 ins_encode %{ 10302 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10303 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10304 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10305 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10306 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, 0); 10307 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10308 __ movdq($tmp$$XMMRegister, $src1$$Register); 10309 __ vpandq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 10310 __ movdq($dst$$Register, $tmp2$$XMMRegister); 10311 %} 10312 ins_pipe( pipe_slow ); 10313 %} 10314 #endif 10315 10316 instruct rsor8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10317 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10318 match(Set dst (OrReductionV src1 src2)); 10319 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10320 format %{ 10321 "pshufd $tmp,$src2,0x1\n\t" 10322 "por $tmp,$src2\n\t" 10323 "movzbl $dst,$src1\n\t" 10324 "pextrb $tmp2,$tmp, 0x0\n\t" 10325 "orl $dst,$tmp2\n\t" 10326 "pextrb $tmp2,$tmp, 0x1\n\t" 10327 "orl $dst,$tmp2\n\t" 10328 "pextrb $tmp2,$tmp, 0x2\n\t" 10329 "orl $dst,$tmp2\n\t" 10330 "pextrb $tmp2,$tmp, 0x3\n\t" 10331 "orl $dst,$tmp2\n\t" 10332 "movsbl $dst,$dst\t! or reduction8B" %} 10333 ins_encode %{ 10334 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10335 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10336 __ movzbl($dst$$Register, $src1$$Register); 10337 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10338 __ orl($dst$$Register, $tmp2$$Register); 10339 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10340 __ orl($dst$$Register, $tmp2$$Register); 10341 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 10342 __ orl($dst$$Register, $tmp2$$Register); 10343 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 10344 __ orl($dst$$Register, $tmp2$$Register); 10345 __ movsbl($dst$$Register, $dst$$Register); 10346 %} 10347 ins_pipe( pipe_slow ); 10348 %} 10349 10350 instruct rsor16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10351 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10352 match(Set dst (OrReductionV src1 src2)); 10353 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10354 format %{ "pshufd $tmp,$src2,0xE\n\t" 10355 "por $tmp,$src2\n\t" 10356 "pshufd $tmp2,$tmp,0x1\n\t" 10357 "por $tmp,$tmp,$tmp2\n\t" 10358 "movzbl $dst,$src1\n\t" 10359 "pextrb $tmp3,$tmp, 0x0\n\t" 10360 "orl $dst,$tmp3\n\t" 10361 "pextrb $tmp3,$tmp, 0x1\n\t" 10362 "orl $dst,$tmp3\n\t" 10363 "pextrb $tmp3,$tmp, 0x2\n\t" 10364 "orl $dst,$tmp3\n\t" 10365 "pextrb $tmp3,$tmp, 0x3\n\t" 10366 "orl $dst,$tmp3\n\t" 10367 "movsbl $dst,$dst\t! or reduction16B" %} 10368 ins_encode %{ 10369 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10370 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10371 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10372 __ por($tmp$$XMMRegister, $tmp2$$XMMRegister); 10373 __ movzbl($dst$$Register, $src1$$Register); 10374 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10375 __ orl($dst$$Register, $tmp3$$Register); 10376 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10377 __ orl($dst$$Register, $tmp3$$Register); 10378 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10379 __ orl($dst$$Register, $tmp3$$Register); 10380 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10381 __ orl($dst$$Register, $tmp3$$Register); 10382 __ movsbl($dst$$Register, $dst$$Register); 10383 %} 10384 ins_pipe( pipe_slow ); 10385 %} 10386 10387 instruct rvor32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10388 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10389 match(Set dst (OrReductionV src1 src2)); 10390 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10391 format %{ "vextracti128_high $tmp,$src2\n\t" 10392 "vpor $tmp,$tmp,$src2\n\t" 10393 "pshufd $tmp2,$tmp,0xE\n\t" 10394 "vpor $tmp,$tmp,$tmp2\n\t" 10395 "pshufd $tmp2,$tmp,0x1\n\t" 10396 "vpor $tmp,$tmp,$tmp2\n\t" 10397 "movzbl $dst,$src1\n\t" 10398 "pextrb $tmp3,$tmp, 0x0\n\t" 10399 "orl $dst,$tmp3\n\t" 10400 "pextrb $tmp3,$tmp, 0x1\n\t" 10401 "orl $dst,$tmp3\n\t" 10402 "pextrb $tmp3,$tmp, 0x2\n\t" 10403 "orl $dst,$tmp3\n\t" 10404 "pextrb $tmp3,$tmp, 0x3\n\t" 10405 "orl $dst,$tmp3\n\t" 10406 "movsbl $dst,$dst\t! or reduction32B" %} 10407 ins_encode %{ 10408 int vector_len = 0; 10409 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10410 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10411 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10412 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10413 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10414 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10415 __ movzbl($dst$$Register, $src1$$Register); 10416 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10417 __ orl($dst$$Register, $tmp3$$Register); 10418 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10419 __ orl($dst$$Register, $tmp3$$Register); 10420 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10421 __ orl($dst$$Register, $tmp3$$Register); 10422 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10423 __ orl($dst$$Register, $tmp3$$Register); 10424 __ movsbl($dst$$Register, $dst$$Register); 10425 %} 10426 ins_pipe( pipe_slow ); 10427 %} 10428 10429 instruct rvor64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10430 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10431 match(Set dst (OrReductionV src1 src2)); 10432 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10433 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10434 "vpor $tmp2,$tmp2,$src2\n\t" 10435 "vextracti128_high $tmp,$tmp2\n\t" 10436 "vpor $tmp,$tmp,$tmp2\n\t" 10437 "pshufd $tmp2,$tmp,0xE\n\t" 10438 "vpor $tmp,$tmp,$tmp2\n\t" 10439 "pshufd $tmp2,$tmp,0x1\n\t" 10440 "vpor $tmp,$tmp,$tmp2\n\t" 10441 "movzbl $dst,$src1\n\t" 10442 "movdl $tmp3,$tmp\n\t" 10443 "orl $dst,$tmp3\n\t" 10444 "shrl $tmp3,0x8\n\t" 10445 "orl $dst,$tmp3\n\t" 10446 "shrl $tmp3,0x8\n\t" 10447 "orl $dst,$tmp3\n\t" 10448 "shrl $tmp3,0x8\n\t" 10449 "orl $dst,$tmp3\n\t" 10450 "movsbl $dst,$dst\t! or reduction64B" %} 10451 ins_encode %{ 10452 int vector_len = 0; 10453 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10454 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10455 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10456 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10457 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10458 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10459 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10460 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10461 __ movzbl($dst$$Register, $src1$$Register); 10462 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10463 __ orl($dst$$Register, $tmp3$$Register); 10464 __ shrl($tmp3$$Register, 8); 10465 __ orl($dst$$Register, $tmp3$$Register); 10466 __ shrl($tmp3$$Register, 8); 10467 __ orl($dst$$Register, $tmp3$$Register); 10468 __ shrl($tmp3$$Register, 8); 10469 __ orl($dst$$Register, $tmp3$$Register); 10470 __ movsbl($dst$$Register, $dst$$Register); 10471 %} 10472 ins_pipe( pipe_slow ); 10473 %} 10474 10475 instruct rsor4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10476 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10477 match(Set dst (OrReductionV src1 src2)); 10478 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10479 format %{ 10480 "pshufd $tmp,$src2,0x1\n\t" 10481 "por $tmp,$src2\n\t" 10482 "movzwl $dst,$src1\n\t" 10483 "pextrw $tmp2,$tmp, 0x0\n\t" 10484 "orw $dst,$tmp2\n\t" 10485 "pextrw $tmp2,$tmp, 0x1\n\t" 10486 "orw $dst,$tmp2\n\t" 10487 "movswl $dst,$dst\t! or reduction4S" %} 10488 ins_encode %{ 10489 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10490 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10491 __ movzwl($dst$$Register, $src1$$Register); 10492 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10493 __ orw($dst$$Register, $tmp2$$Register); 10494 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10495 __ orw($dst$$Register, $tmp2$$Register); 10496 __ movswl($dst$$Register, $dst$$Register); 10497 %} 10498 ins_pipe( pipe_slow ); 10499 %} 10500 10501 instruct rsor8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10502 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10503 match(Set dst (OrReductionV src1 src2)); 10504 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10505 format %{ "pshufd $tmp,$src2,0xE\n\t" 10506 "por $tmp,$src2\n\t" 10507 "pshufd $tmp2,$tmp,0x1\n\t" 10508 "por $tmp,$tmp,$tmp2\n\t" 10509 "movzwl $dst,$src1\n\t" 10510 "pextrw $tmp3,$tmp, 0x0\n\t" 10511 "orw $dst,$tmp3\n\t" 10512 "pextrw $tmp3,$tmp, 0x1\n\t" 10513 "orw $dst,$tmp3\n\t" 10514 "movswl $dst,$dst\t! or reduction8S" %} 10515 ins_encode %{ 10516 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10517 __ por($tmp$$XMMRegister, $src2$$XMMRegister); 10518 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10519 __ por($tmp$$XMMRegister, $tmp2$$XMMRegister); 10520 __ movzwl($dst$$Register, $src1$$Register); 10521 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10522 __ orw($dst$$Register, $tmp3$$Register); 10523 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10524 __ orw($dst$$Register, $tmp3$$Register); 10525 __ movswl($dst$$Register, $dst$$Register); 10526 %} 10527 ins_pipe( pipe_slow ); 10528 %} 10529 10530 instruct rvor16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10531 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10532 match(Set dst (OrReductionV src1 src2)); 10533 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10534 format %{ "vextracti128_high $tmp,$src2\n\t" 10535 "vpor $tmp,$tmp,$src2\n\t" 10536 "pshufd $tmp2,$tmp,0xE\n\t" 10537 "vpor $tmp,$tmp,$tmp2\n\t" 10538 "pshufd $tmp2,$tmp,0x1\n\t" 10539 "vpor $tmp,$tmp,$tmp2\n\t" 10540 "movzwl $dst,$src1\n\t" 10541 "pextrw $tmp3,$tmp, 0x0\n\t" 10542 "orw $dst,$tmp3\n\t" 10543 "pextrw $tmp3,$tmp, 0x1\n\t" 10544 "orw $dst,$tmp3\n\t" 10545 "movswl $dst,$dst\t! or reduction16S" %} 10546 ins_encode %{ 10547 int vector_len = 0; 10548 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10549 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10550 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10551 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10552 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10553 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10554 __ movzwl($dst$$Register, $src1$$Register); 10555 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10556 __ orw($dst$$Register, $tmp3$$Register); 10557 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10558 __ orw($dst$$Register, $tmp3$$Register); 10559 __ movswl($dst$$Register, $dst$$Register); 10560 %} 10561 ins_pipe( pipe_slow ); 10562 %} 10563 10564 instruct rvor32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10565 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10566 match(Set dst (OrReductionV src1 src2)); 10567 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10568 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10569 "vpor $tmp2,$tmp2,$src2\n\t" 10570 "vextracti128_high $tmp,$tmp2\n\t" 10571 "vpor $tmp,$tmp,$tmp2\n\t" 10572 "pshufd $tmp2,$tmp,0xE\n\t" 10573 "vpor $tmp,$tmp,$tmp2\n\t" 10574 "pshufd $tmp2,$tmp,0x1\n\t" 10575 "vpor $tmp,$tmp,$tmp2\n\t" 10576 "movzwl $dst,$src1\n\t" 10577 "movdl $tmp3,$tmp\n\t" 10578 "orw $dst,$tmp3\n\t" 10579 "shrl $tmp3,0x16\n\t" 10580 "orw $dst,$tmp3\n\t" 10581 "movswl $dst,$dst\t! or reduction32S" %} 10582 ins_encode %{ 10583 int vector_len = 0; 10584 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10585 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10586 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10587 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10588 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10589 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10590 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10591 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10592 __ movzwl($dst$$Register, $src1$$Register); 10593 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10594 __ orw($dst$$Register, $tmp3$$Register); 10595 __ shrl($tmp3$$Register, 16); 10596 __ orw($dst$$Register, $tmp3$$Register); 10597 __ movswl($dst$$Register, $dst$$Register); 10598 %} 10599 ins_pipe( pipe_slow ); 10600 %} 10601 10602 instruct rsor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 10603 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10604 match(Set dst (OrReductionV src1 src2)); 10605 effect(TEMP tmp, TEMP tmp2); 10606 format %{ "pshufd $tmp2,$src2,0x1\n\t" 10607 "por $tmp2,$src2\n\t" 10608 "movd $tmp,$src1\n\t" 10609 "por $tmp2,$tmp\n\t" 10610 "movd $dst,$tmp2\t! or reduction2I" %} 10611 ins_encode %{ 10612 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 10613 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 10614 __ movdl($tmp$$XMMRegister, $src1$$Register); 10615 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10616 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10617 %} 10618 ins_pipe( pipe_slow ); 10619 %} 10620 10621 instruct rsor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 10622 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10623 match(Set dst (OrReductionV src1 src2)); 10624 effect(TEMP tmp, TEMP tmp2); 10625 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10626 "por $tmp2,$src2\n\t" 10627 "pshufd $tmp,$tmp2,0x1\n\t" 10628 "por $tmp2,$tmp\n\t" 10629 "movd $tmp,$src1\n\t" 10630 "por $tmp2,$tmp\n\t" 10631 "movd $dst,$tmp2\t! or reduction4I" %} 10632 ins_encode %{ 10633 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10634 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 10635 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 10636 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10637 __ movdl($tmp$$XMMRegister, $src1$$Register); 10638 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10639 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10640 %} 10641 ins_pipe( pipe_slow ); 10642 %} 10643 10644 instruct rvor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 10645 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10646 match(Set dst (OrReductionV src1 src2)); 10647 effect(TEMP tmp, TEMP tmp2); 10648 format %{ "vextracti128_high $tmp,$src2\n\t" 10649 "vpor $tmp,$tmp,$src2\n\t" 10650 "vpshufd $tmp2,$tmp,0xE\t" 10651 "vpor $tmp,$tmp,$tmp2\n\t" 10652 "vpshufd $tmp2,$tmp,0x1\t" 10653 "vpor $tmp,$tmp,$tmp2\n\t" 10654 "movd $tmp2,$src1\n\t" 10655 "vpor $tmp2,$tmp,$tmp2\n\t" 10656 "movd $dst,$tmp2\t! or reduction8I" %} 10657 ins_encode %{ 10658 int vector_len = 0; 10659 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10660 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10661 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10662 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10663 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10664 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10665 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10666 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10667 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10668 %} 10669 ins_pipe( pipe_slow ); 10670 %} 10671 10672 instruct rvor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 10673 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 10674 match(Set dst (OrReductionV src1 src2)); 10675 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 10676 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 10677 "vpor $tmp3,$tmp3,$src2\n\t" 10678 "vextracti128_high $tmp,$tmp3\n\t" 10679 "vpor $tmp,$tmp,$src2\n\t" 10680 "vpshufd $tmp2,$tmp,0xE\t" 10681 "vpor $tmp,$tmp,$tmp2\n\t" 10682 "vpshufd $tmp2,$tmp,0x1\t" 10683 "vpor $tmp,$tmp,$tmp2\n\t" 10684 "movd $tmp2,$src1\n\t" 10685 "vpor $tmp2,$tmp,$tmp2\n\t" 10686 "movd $dst,$tmp2\t! or reduction16I" %} 10687 ins_encode %{ 10688 int vector_len = 0; 10689 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 10690 __ vpor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 10691 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 10692 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 10693 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10694 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10695 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10696 __ vpor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10697 __ movdl($tmp2$$XMMRegister, $src1$$Register); 10698 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10699 __ movdl($dst$$Register, $tmp2$$XMMRegister); 10700 %} 10701 ins_pipe( pipe_slow ); 10702 %} 10703 10704 instruct rsor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 10705 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10706 match(Set dst (OrReductionV src1 src2)); 10707 effect(TEMP tmp, TEMP tmp2); 10708 format %{ "pshufd $tmp2,$src2,0xE\n\t" 10709 "por $tmp2,$src2\n\t" 10710 "movdq $tmp,$src1\n\t" 10711 "por $tmp2,$tmp\n\t" 10712 "movq $dst,$tmp2\t! or reduction2L" %} 10713 ins_encode %{ 10714 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 10715 __ por($tmp2$$XMMRegister, $src2$$XMMRegister); 10716 __ movdq($tmp$$XMMRegister, $src1$$Register); 10717 __ por($tmp2$$XMMRegister, $tmp$$XMMRegister); 10718 __ movq($dst$$Register, $tmp2$$XMMRegister); 10719 %} 10720 ins_pipe( pipe_slow ); 10721 %} 10722 10723 instruct rvor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 10724 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10725 match(Set dst (OrReductionV src1 src2)); 10726 effect(TEMP tmp, TEMP tmp2); 10727 format %{ "vextracti128_high $tmp,$src2\n\t" 10728 "vpor $tmp2,$tmp,$src2\n\t" 10729 "vpshufd $tmp,$tmp2,0xE\t" 10730 "vpor $tmp2,$tmp2,$tmp\n\t" 10731 "movq $tmp,$src1\n\t" 10732 "vpor $tmp2,$tmp2,$tmp\n\t" 10733 "movq $dst,$tmp2\t! or reduction4L" %} 10734 ins_encode %{ 10735 int vector_len = 0; 10736 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10737 __ vpor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10738 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 10739 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10740 __ movq($tmp$$XMMRegister, $src1$$Register); 10741 __ vpor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10742 __ movq($dst$$Register, $tmp2$$XMMRegister); 10743 %} 10744 ins_pipe( pipe_slow ); 10745 %} 10746 10747 #ifdef _LP64 10748 instruct rvor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 10749 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 10750 match(Set dst (OrReductionV src1 src2)); 10751 effect(TEMP tmp, TEMP tmp2); 10752 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10753 "vporq $tmp2,$tmp2,$src2\n\t" 10754 "vextracti128_high $tmp,$tmp2\n\t" 10755 "vporq $tmp2,$tmp2,$tmp\n\t" 10756 "vpshufd $tmp,$tmp2,0xE\t" 10757 "vporq $tmp2,$tmp2,$tmp\n\t" 10758 "movdq $tmp,$src1\n\t" 10759 "vporq $tmp2,$tmp2,$tmp\n\t" 10760 "movdq $dst,$tmp2\t! or reduction8L" %} 10761 ins_encode %{ 10762 int vector_len = 0; 10763 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10764 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10765 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10766 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10767 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 10768 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10769 __ movdq($tmp$$XMMRegister, $src1$$Register); 10770 __ vporq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 10771 __ movdq($dst$$Register, $tmp2$$XMMRegister); 10772 %} 10773 ins_pipe( pipe_slow ); 10774 %} 10775 #endif 10776 10777 instruct rsxor8B_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10778 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10779 match(Set dst (XorReductionV src1 src2)); 10780 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10781 format %{ 10782 "pshufd $tmp,$src2,0x1\n\t" 10783 "pxor $tmp,$src2\n\t" 10784 "movzbl $dst,$src1\n\t" 10785 "pextrb $tmp2,$tmp, 0x0\n\t" 10786 "xorl $dst,$tmp2\n\t" 10787 "pextrb $tmp2,$tmp, 0x1\n\t" 10788 "xorl $dst,$tmp2\n\t" 10789 "pextrb $tmp2,$tmp, 0x2\n\t" 10790 "xorl $dst,$tmp2\n\t" 10791 "pextrb $tmp2,$tmp, 0x3\n\t" 10792 "xorl $dst,$tmp2\n\t" 10793 "movsbl $dst,$dst\t! xor reduction8B" %} 10794 ins_encode %{ 10795 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10796 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10797 __ movzbl($dst$$Register, $src1$$Register); 10798 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10799 __ xorl($dst$$Register, $tmp2$$Register); 10800 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10801 __ xorl($dst$$Register, $tmp2$$Register); 10802 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x2); 10803 __ xorl($dst$$Register, $tmp2$$Register); 10804 __ pextrb($tmp2$$Register, $tmp$$XMMRegister, 0x3); 10805 __ xorl($dst$$Register, $tmp2$$Register); 10806 __ movsbl($dst$$Register, $dst$$Register); 10807 %} 10808 ins_pipe( pipe_slow ); 10809 %} 10810 10811 instruct rsxor16B_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10812 predicate(UseSSE > 3 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10813 match(Set dst (XorReductionV src1 src2)); 10814 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10815 format %{ "pshufd $tmp,$src2,0xE\n\t" 10816 "pxor $tmp,$src2\n\t" 10817 "pshufd $tmp2,$tmp,0x1\n\t" 10818 "pxor $tmp,$tmp,$tmp2\n\t" 10819 "movzbl $dst,$src1\n\t" 10820 "pextrb $tmp3,$tmp, 0x0\n\t" 10821 "xorl $dst,$tmp3\n\t" 10822 "pextrb $tmp3,$tmp, 0x1\n\t" 10823 "xorl $dst,$tmp3\n\t" 10824 "pextrb $tmp3,$tmp, 0x2\n\t" 10825 "xorl $dst,$tmp3\n\t" 10826 "pextrb $tmp3,$tmp, 0x3\n\t" 10827 "xorl $dst,$tmp3\n\t" 10828 "movsbl $dst,$dst\t! xor reduction16B" %} 10829 ins_encode %{ 10830 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10831 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10832 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10833 __ pxor($tmp$$XMMRegister, $tmp2$$XMMRegister); 10834 __ movzbl($dst$$Register, $src1$$Register); 10835 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10836 __ xorl($dst$$Register, $tmp3$$Register); 10837 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10838 __ xorl($dst$$Register, $tmp3$$Register); 10839 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10840 __ xorl($dst$$Register, $tmp3$$Register); 10841 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10842 __ xorl($dst$$Register, $tmp3$$Register); 10843 __ movsbl($dst$$Register, $dst$$Register); 10844 %} 10845 ins_pipe( pipe_slow ); 10846 %} 10847 10848 instruct rvxor32B_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10849 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10850 match(Set dst (XorReductionV src1 src2)); 10851 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10852 format %{ "vextracti128_high $tmp,$src2\n\t" 10853 "vpxor $tmp,$tmp,$src2\n\t" 10854 "pshufd $tmp2,$tmp,0xE\n\t" 10855 "vpxor $tmp,$tmp,$tmp2\n\t" 10856 "pshufd $tmp2,$tmp,0x1\n\t" 10857 "vpxor $tmp,$tmp,$tmp2\n\t" 10858 "movzbl $dst,$src1\n\t" 10859 "pextrb $tmp3,$tmp, 0x0\n\t" 10860 "xorl $dst,$tmp3\n\t" 10861 "pextrb $tmp3,$tmp, 0x1\n\t" 10862 "xorl $dst,$tmp3\n\t" 10863 "pextrb $tmp3,$tmp, 0x2\n\t" 10864 "xorl $dst,$tmp3\n\t" 10865 "pextrb $tmp3,$tmp, 0x3\n\t" 10866 "xorl $dst,$tmp3\n\t" 10867 "movsbl $dst,$dst\t! xor reduction32B" %} 10868 ins_encode %{ 10869 int vector_len = 0; 10870 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 10871 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 10872 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 10873 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10874 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 10875 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10876 __ movzbl($dst$$Register, $src1$$Register); 10877 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10878 __ xorl($dst$$Register, $tmp3$$Register); 10879 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10880 __ xorl($dst$$Register, $tmp3$$Register); 10881 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x2); 10882 __ xorl($dst$$Register, $tmp3$$Register); 10883 __ pextrb($tmp3$$Register, $tmp$$XMMRegister, 0x3); 10884 __ xorl($dst$$Register, $tmp3$$Register); 10885 __ movsbl($dst$$Register, $dst$$Register); 10886 %} 10887 ins_pipe( pipe_slow ); 10888 %} 10889 10890 instruct rvxor64B_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10891 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 10892 match(Set dst (XorReductionV src1 src2)); 10893 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10894 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 10895 "vpxor $tmp2,$tmp2,$src2\n\t" 10896 "vextracti128_high $tmp,$tmp2\n\t" 10897 "vpxor $tmp,$tmp,$tmp2\n\t" 10898 "pshufd $tmp2,$tmp,0xE\n\t" 10899 "vpxor $tmp,$tmp,$tmp2\n\t" 10900 "pshufd $tmp2,$tmp,0x1\n\t" 10901 "vpxor $tmp,$tmp,$tmp2\n\t" 10902 "movzbl $dst,$src1\n\t" 10903 "movdl $tmp3,$tmp\n\t" 10904 "xorl $dst,$tmp3\n\t" 10905 "shrl $tmp3,0x8\n\t" 10906 "xorl $dst,$tmp3\n\t" 10907 "shrl $tmp3,0x8\n\t" 10908 "xorl $dst,$tmp3\n\t" 10909 "shrl $tmp3,0x8\n\t" 10910 "xorl $dst,$tmp3\n\t" 10911 "movsbl $dst,$dst\t! xor reduction64B" %} 10912 ins_encode %{ 10913 int vector_len = 0; 10914 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 10915 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 10916 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 10917 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10918 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 10919 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10920 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10921 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 10922 __ movzbl($dst$$Register, $src1$$Register); 10923 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 10924 __ xorl($dst$$Register, $tmp3$$Register); 10925 __ shrl($tmp3$$Register, 8); 10926 __ xorl($dst$$Register, $tmp3$$Register); 10927 __ shrl($tmp3$$Register, 8); 10928 __ xorl($dst$$Register, $tmp3$$Register); 10929 __ shrl($tmp3$$Register, 8); 10930 __ xorl($dst$$Register, $tmp3$$Register); 10931 __ movsbl($dst$$Register, $dst$$Register); 10932 %} 10933 ins_pipe( pipe_slow ); 10934 %} 10935 10936 instruct rsxor4S_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, rRegI tmp2) %{ 10937 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10938 match(Set dst (XorReductionV src1 src2)); 10939 effect(TEMP tmp, TEMP tmp2, TEMP dst); 10940 format %{ 10941 "pshufd $tmp,$src2,0x1\n\t" 10942 "pxor $tmp,$src2\n\t" 10943 "movzwl $dst,$src1\n\t" 10944 "pextrw $tmp2,$tmp, 0x0\n\t" 10945 "xorw $dst,$tmp2\n\t" 10946 "pextrw $tmp2,$tmp, 0x1\n\t" 10947 "xorw $dst,$tmp2\n\t" 10948 "movswl $dst,$dst\t! xor reduction4S" %} 10949 ins_encode %{ 10950 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x1); 10951 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10952 __ movzwl($dst$$Register, $src1$$Register); 10953 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x0); 10954 __ xorw($dst$$Register, $tmp2$$Register); 10955 __ pextrw($tmp2$$Register, $tmp$$XMMRegister, 0x1); 10956 __ xorw($dst$$Register, $tmp2$$Register); 10957 __ movswl($dst$$Register, $dst$$Register); 10958 %} 10959 ins_pipe( pipe_slow ); 10960 %} 10961 10962 instruct rsxor8S_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10963 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10964 match(Set dst (XorReductionV src1 src2)); 10965 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10966 format %{ "pshufd $tmp,$src2,0xE\n\t" 10967 "pxor $tmp,$src2\n\t" 10968 "pshufd $tmp2,$tmp,0x1\n\t" 10969 "pxor $tmp,$tmp,$tmp2\n\t" 10970 "movzwl $dst,$src1\n\t" 10971 "pextrw $tmp3,$tmp, 0x0\n\t" 10972 "xorw $dst,$tmp3\n\t" 10973 "pextrw $tmp3,$tmp, 0x1\n\t" 10974 "xorw $dst,$tmp3\n\t" 10975 "movswl $dst,$dst\t! xor reduction8S" %} 10976 ins_encode %{ 10977 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 10978 __ pxor($tmp$$XMMRegister, $src2$$XMMRegister); 10979 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 10980 __ pxor($tmp$$XMMRegister, $tmp2$$XMMRegister); 10981 __ movzwl($dst$$Register, $src1$$Register); 10982 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 10983 __ xorw($dst$$Register, $tmp3$$Register); 10984 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 10985 __ xorw($dst$$Register, $tmp3$$Register); 10986 __ movswl($dst$$Register, $dst$$Register); 10987 %} 10988 ins_pipe( pipe_slow ); 10989 %} 10990 10991 instruct rvxor16S_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2, rRegI tmp3) %{ 10992 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 10993 match(Set dst (XorReductionV src1 src2)); 10994 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 10995 format %{ "vextracti128_high $tmp,$src2\n\t" 10996 "vpxor $tmp,$tmp,$src2\n\t" 10997 "pshufd $tmp2,$tmp,0xE\n\t" 10998 "vpxor $tmp,$tmp,$tmp2\n\t" 10999 "pshufd $tmp2,$tmp,0x1\n\t" 11000 "vpxor $tmp,$tmp,$tmp2\n\t" 11001 "movzwl $dst,$src1\n\t" 11002 "pextrw $tmp3,$tmp, 0x0\n\t" 11003 "xorw $dst,$tmp3\n\t" 11004 "pextrw $tmp3,$tmp, 0x1\n\t" 11005 "xorw $dst,$tmp3\n\t" 11006 "movswl $dst,$dst\t! xor reduction16S" %} 11007 ins_encode %{ 11008 int vector_len = 0; 11009 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 11010 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 11011 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 11012 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11013 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 11014 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11015 __ movzwl($dst$$Register, $src1$$Register); 11016 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x0); 11017 __ xorw($dst$$Register, $tmp3$$Register); 11018 __ pextrw($tmp3$$Register, $tmp$$XMMRegister, 0x1); 11019 __ xorw($dst$$Register, $tmp3$$Register); 11020 __ movswl($dst$$Register, $dst$$Register); 11021 %} 11022 ins_pipe( pipe_slow ); 11023 %} 11024 11025 instruct rvxor32S_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, rRegI tmp3) %{ 11026 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 11027 match(Set dst (XorReductionV src1 src2)); 11028 effect(TEMP tmp, TEMP tmp2, TEMP tmp3, TEMP dst); 11029 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 11030 "vpxor $tmp2,$tmp2,$src2\n\t" 11031 "vextracti128_high $tmp,$tmp2\n\t" 11032 "vpxor $tmp,$tmp,$tmp2\n\t" 11033 "pshufd $tmp2,$tmp,0xE\n\t" 11034 "vpxor $tmp,$tmp,$tmp2\n\t" 11035 "pshufd $tmp2,$tmp,0x1\n\t" 11036 "vpxor $tmp,$tmp,$tmp2\n\t" 11037 "movzwl $dst,$src1\n\t" 11038 "movdl $tmp3,$tmp\n\t" 11039 "xorw $dst,$tmp3\n\t" 11040 "shrl $tmp3,0x16\n\t" 11041 "xorw $dst,$tmp3\n\t" 11042 "movswl $dst,$dst\t! xor reduction32S" %} 11043 ins_encode %{ 11044 int vector_len = 0; 11045 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 11046 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 11047 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 11048 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11049 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 11050 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11051 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 11052 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11053 __ movzwl($dst$$Register, $src1$$Register); 11054 __ movdl($tmp3$$Register, $tmp$$XMMRegister); 11055 __ xorw($dst$$Register, $tmp3$$Register); 11056 __ shrl($tmp3$$Register, 16); 11057 __ xorw($dst$$Register, $tmp3$$Register); 11058 __ movswl($dst$$Register, $dst$$Register); 11059 %} 11060 ins_pipe( pipe_slow ); 11061 %} 11062 11063 instruct rsxor2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 11064 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11065 match(Set dst (XorReductionV src1 src2)); 11066 effect(TEMP tmp, TEMP tmp2); 11067 format %{ "pshufd $tmp2,$src2,0x1\n\t" 11068 "pxor $tmp2,$src2\n\t" 11069 "movd $tmp,$src1\n\t" 11070 "pxor $tmp2,$tmp\n\t" 11071 "movd $dst,$tmp2\t! xor reduction2I" %} 11072 ins_encode %{ 11073 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 11074 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 11075 __ movdl($tmp$$XMMRegister, $src1$$Register); 11076 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11077 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11078 %} 11079 ins_pipe( pipe_slow ); 11080 %} 11081 11082 instruct rsxor4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 11083 predicate(UseSSE > 1 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11084 match(Set dst (XorReductionV src1 src2)); 11085 effect(TEMP tmp, TEMP tmp2); 11086 format %{ "pshufd $tmp2,$src2,0xE\n\t" 11087 "pxor $tmp2,$src2\n\t" 11088 "pshufd $tmp,$tmp2,0x1\n\t" 11089 "pxor $tmp2,$tmp\n\t" 11090 "movd $tmp,$src1\n\t" 11091 "pxor $tmp2,$tmp\n\t" 11092 "movd $dst,$tmp2\t! xor reduction4I" %} 11093 ins_encode %{ 11094 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 11095 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 11096 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 11097 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11098 __ movdl($tmp$$XMMRegister, $src1$$Register); 11099 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11100 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11101 %} 11102 ins_pipe( pipe_slow ); 11103 %} 11104 11105 instruct rvxor8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 11106 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11107 match(Set dst (XorReductionV src1 src2)); 11108 effect(TEMP tmp, TEMP tmp2); 11109 format %{ "vextracti128_high $tmp,$src2\n\t" 11110 "vpxor $tmp,$tmp,$src2\n\t" 11111 "vpshufd $tmp2,$tmp,0xE\t" 11112 "vpxor $tmp,$tmp,$tmp2\n\t" 11113 "vpshufd $tmp2,$tmp,0x1\t" 11114 "vpxor $tmp,$tmp,$tmp2\n\t" 11115 "movd $tmp2,$src1\n\t" 11116 "vpxor $tmp2,$tmp,$tmp2\n\t" 11117 "movd $dst,$tmp2\t! xor reduction8I" %} 11118 ins_encode %{ 11119 int vector_len = 0; 11120 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 11121 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 11122 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 11123 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11124 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 11125 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11126 __ movdl($tmp2$$XMMRegister, $src1$$Register); 11127 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11128 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11129 %} 11130 ins_pipe( pipe_slow ); 11131 %} 11132 11133 instruct rvxor16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 11134 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); 11135 match(Set dst (XorReductionV src1 src2)); 11136 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 11137 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 11138 "vpxor $tmp3,$tmp3,$src2\n\t" 11139 "vextracti128_high $tmp,$tmp3\n\t" 11140 "vpxor $tmp,$tmp,$src2\n\t" 11141 "vpshufd $tmp2,$tmp,0xE\t" 11142 "vpxor $tmp,$tmp,$tmp2\n\t" 11143 "vpshufd $tmp2,$tmp,0x1\t" 11144 "vpxor $tmp,$tmp,$tmp2\n\t" 11145 "movd $tmp2,$src1\n\t" 11146 "vpxor $tmp2,$tmp,$tmp2\n\t" 11147 "movd $dst,$tmp2\t! xor reduction16I" %} 11148 ins_encode %{ 11149 int vector_len = 0; 11150 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 11151 __ vpxor($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 11152 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 11153 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, vector_len); 11154 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE, vector_len); 11155 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11156 __ vpshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1, vector_len); 11157 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11158 __ movdl($tmp2$$XMMRegister, $src1$$Register); 11159 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 11160 __ movdl($dst$$Register, $tmp2$$XMMRegister); 11161 %} 11162 ins_pipe( pipe_slow ); 11163 %} 11164 11165 instruct rsxor2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 11166 predicate(UseSSE >= 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11167 match(Set dst (XorReductionV src1 src2)); 11168 effect(TEMP tmp, TEMP tmp2); 11169 format %{ "pshufd $tmp2,$src2,0xE\n\t" 11170 "pxor $tmp2,$src2\n\t" 11171 "movdq $tmp,$src1\n\t" 11172 "pxor $tmp2,$tmp\n\t" 11173 "movq $dst,$tmp2\t! xor reduction2L" %} 11174 ins_encode %{ 11175 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 11176 __ pxor($tmp2$$XMMRegister, $src2$$XMMRegister); 11177 __ movdq($tmp$$XMMRegister, $src1$$Register); 11178 __ pxor($tmp2$$XMMRegister, $tmp$$XMMRegister); 11179 __ movq($dst$$Register, $tmp2$$XMMRegister); 11180 %} 11181 ins_pipe( pipe_slow ); 11182 %} 11183 11184 instruct rvxor4L_reduction_reg_avx(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 11185 predicate(UseAVX > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11186 match(Set dst (XorReductionV src1 src2)); 11187 effect(TEMP tmp, TEMP tmp2); 11188 format %{ "vextracti128_high $tmp,$src2\n\t" 11189 "vpxor $tmp2,$tmp,$src2\n\t" 11190 "vpshufd $tmp,$tmp2,0xE\t" 11191 "vpxor $tmp2,$tmp2,$tmp\n\t" 11192 "movq $tmp,$src1\n\t" 11193 "vpxor $tmp2,$tmp2,$tmp\n\t" 11194 "movq $dst,$tmp2\t! xor reduction4L" %} 11195 ins_encode %{ 11196 int vector_len = 0; 11197 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 11198 __ vpxor($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 11199 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 11200 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11201 __ movq($tmp$$XMMRegister, $src1$$Register); 11202 __ vpxor($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11203 __ movq($dst$$Register, $tmp2$$XMMRegister); 11204 %} 11205 ins_pipe( pipe_slow ); 11206 %} 11207 11208 #ifdef _LP64 11209 instruct rvxor8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 11210 predicate(UseAVX > 2 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 11211 match(Set dst (XorReductionV src1 src2)); 11212 effect(TEMP tmp, TEMP tmp2); 11213 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 11214 "vpxorq $tmp2,$tmp2,$src2\n\t" 11215 "vextracti128_high $tmp,$tmp2\n\t" 11216 "vpxorq $tmp2,$tmp2,$tmp\n\t" 11217 "vpshufd $tmp,$tmp2,0xE\t" 11218 "vpxorq $tmp2,$tmp2,$tmp\n\t" 11219 "movdq $tmp,$src1\n\t" 11220 "vpxorq $tmp2,$tmp2,$tmp\n\t" 11221 "movdq $dst,$tmp2\t! xor reduction8L" %} 11222 ins_encode %{ 11223 int vector_len = 0; 11224 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 11225 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 11226 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 11227 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11228 __ vpshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE, vector_len); 11229 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11230 __ movdq($tmp$$XMMRegister, $src1$$Register); 11231 __ vpxorq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 11232 __ movdq($dst$$Register, $tmp2$$XMMRegister); 11233 %} 11234 ins_pipe( pipe_slow ); 11235 %} 11236 #endif 11237 11238 // ====================VECTOR ARITHMETIC======================================= 11239 11240 // --------------------------------- ADD -------------------------------------- 11241 11242 // Bytes vector add 11243 instruct vadd4B(vecS dst, vecS src) %{ 11244 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11245 match(Set dst (AddVB dst src)); 11246 format %{ "paddb $dst,$src\t! add packed4B" %} 11247 ins_encode %{ 11248 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 11249 %} 11250 ins_pipe( pipe_slow ); 11251 %} 11252 11253 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 11254 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11255 match(Set dst (AddVB src1 src2)); 11256 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 11257 ins_encode %{ 11258 int vector_len = 0; 11259 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11260 %} 11261 ins_pipe( pipe_slow ); 11262 %} 11263 11264 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 11265 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11266 match(Set dst (AddVB src1 src2)); 11267 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 11268 ins_encode %{ 11269 int vector_len = 0; 11270 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11271 %} 11272 ins_pipe( pipe_slow ); 11273 %} 11274 11275 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 11276 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11277 match(Set dst (AddVB dst src2)); 11278 effect(TEMP src1); 11279 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 11280 ins_encode %{ 11281 int vector_len = 0; 11282 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11283 %} 11284 ins_pipe( pipe_slow ); 11285 %} 11286 11287 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 11288 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11289 match(Set dst (AddVB src (LoadVector mem))); 11290 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 11291 ins_encode %{ 11292 int vector_len = 0; 11293 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11294 %} 11295 ins_pipe( pipe_slow ); 11296 %} 11297 11298 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 11299 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11300 match(Set dst (AddVB src (LoadVector mem))); 11301 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 11302 ins_encode %{ 11303 int vector_len = 0; 11304 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11305 %} 11306 ins_pipe( pipe_slow ); 11307 %} 11308 11309 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 11310 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11311 match(Set dst (AddVB dst (LoadVector mem))); 11312 effect(TEMP src); 11313 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 11314 ins_encode %{ 11315 int vector_len = 0; 11316 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11317 %} 11318 ins_pipe( pipe_slow ); 11319 %} 11320 11321 instruct vadd8B(vecD dst, vecD src) %{ 11322 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11323 match(Set dst (AddVB dst src)); 11324 format %{ "paddb $dst,$src\t! add packed8B" %} 11325 ins_encode %{ 11326 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 11327 %} 11328 ins_pipe( pipe_slow ); 11329 %} 11330 11331 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 11332 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11333 match(Set dst (AddVB src1 src2)); 11334 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 11335 ins_encode %{ 11336 int vector_len = 0; 11337 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11338 %} 11339 ins_pipe( pipe_slow ); 11340 %} 11341 11342 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 11343 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11344 match(Set dst (AddVB src1 src2)); 11345 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 11346 ins_encode %{ 11347 int vector_len = 0; 11348 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11349 %} 11350 ins_pipe( pipe_slow ); 11351 %} 11352 11353 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 11354 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11355 match(Set dst (AddVB dst src2)); 11356 effect(TEMP src1); 11357 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 11358 ins_encode %{ 11359 int vector_len = 0; 11360 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11361 %} 11362 ins_pipe( pipe_slow ); 11363 %} 11364 11365 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 11366 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11367 match(Set dst (AddVB src (LoadVector mem))); 11368 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 11369 ins_encode %{ 11370 int vector_len = 0; 11371 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11372 %} 11373 ins_pipe( pipe_slow ); 11374 %} 11375 11376 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 11377 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11378 match(Set dst (AddVB src (LoadVector mem))); 11379 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 11380 ins_encode %{ 11381 int vector_len = 0; 11382 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11383 %} 11384 ins_pipe( pipe_slow ); 11385 %} 11386 11387 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 11388 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11389 match(Set dst (AddVB dst (LoadVector mem))); 11390 effect(TEMP src); 11391 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 11392 ins_encode %{ 11393 int vector_len = 0; 11394 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11395 %} 11396 ins_pipe( pipe_slow ); 11397 %} 11398 11399 instruct vadd16B(vecX dst, vecX src) %{ 11400 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 11401 match(Set dst (AddVB dst src)); 11402 format %{ "paddb $dst,$src\t! add packed16B" %} 11403 ins_encode %{ 11404 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 11405 %} 11406 ins_pipe( pipe_slow ); 11407 %} 11408 11409 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11410 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 11411 match(Set dst (AddVB src1 src2)); 11412 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 11413 ins_encode %{ 11414 int vector_len = 0; 11415 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11416 %} 11417 ins_pipe( pipe_slow ); 11418 %} 11419 11420 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11421 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11422 match(Set dst (AddVB src1 src2)); 11423 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 11424 ins_encode %{ 11425 int vector_len = 0; 11426 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11427 %} 11428 ins_pipe( pipe_slow ); 11429 %} 11430 11431 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 11432 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11433 match(Set dst (AddVB dst src2)); 11434 effect(TEMP src1); 11435 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 11436 ins_encode %{ 11437 int vector_len = 0; 11438 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11439 %} 11440 ins_pipe( pipe_slow ); 11441 %} 11442 11443 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 11444 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 11445 match(Set dst (AddVB src (LoadVector mem))); 11446 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 11447 ins_encode %{ 11448 int vector_len = 0; 11449 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11450 %} 11451 ins_pipe( pipe_slow ); 11452 %} 11453 11454 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 11455 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11456 match(Set dst (AddVB src (LoadVector mem))); 11457 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 11458 ins_encode %{ 11459 int vector_len = 0; 11460 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11461 %} 11462 ins_pipe( pipe_slow ); 11463 %} 11464 11465 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 11466 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11467 match(Set dst (AddVB dst (LoadVector mem))); 11468 effect(TEMP src); 11469 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 11470 ins_encode %{ 11471 int vector_len = 0; 11472 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11473 %} 11474 ins_pipe( pipe_slow ); 11475 %} 11476 11477 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11478 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 11479 match(Set dst (AddVB src1 src2)); 11480 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 11481 ins_encode %{ 11482 int vector_len = 1; 11483 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11484 %} 11485 ins_pipe( pipe_slow ); 11486 %} 11487 11488 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11489 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11490 match(Set dst (AddVB src1 src2)); 11491 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 11492 ins_encode %{ 11493 int vector_len = 1; 11494 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11495 %} 11496 ins_pipe( pipe_slow ); 11497 %} 11498 11499 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 11500 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 11501 match(Set dst (AddVB dst src2)); 11502 effect(TEMP src1); 11503 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 11504 ins_encode %{ 11505 int vector_len = 1; 11506 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11507 %} 11508 ins_pipe( pipe_slow ); 11509 %} 11510 11511 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 11512 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 11513 match(Set dst (AddVB src (LoadVector mem))); 11514 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 11515 ins_encode %{ 11516 int vector_len = 1; 11517 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11518 %} 11519 ins_pipe( pipe_slow ); 11520 %} 11521 11522 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 11523 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11524 match(Set dst (AddVB src (LoadVector mem))); 11525 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 11526 ins_encode %{ 11527 int vector_len = 1; 11528 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11529 %} 11530 ins_pipe( pipe_slow ); 11531 %} 11532 11533 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 11534 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 11535 match(Set dst (AddVB dst (LoadVector mem))); 11536 effect(TEMP src); 11537 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 11538 ins_encode %{ 11539 int vector_len = 1; 11540 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11541 %} 11542 ins_pipe( pipe_slow ); 11543 %} 11544 11545 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11546 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 11547 match(Set dst (AddVB src1 src2)); 11548 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 11549 ins_encode %{ 11550 int vector_len = 2; 11551 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11552 %} 11553 ins_pipe( pipe_slow ); 11554 %} 11555 11556 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 11557 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 11558 match(Set dst (AddVB src (LoadVector mem))); 11559 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 11560 ins_encode %{ 11561 int vector_len = 2; 11562 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11563 %} 11564 ins_pipe( pipe_slow ); 11565 %} 11566 11567 // Shorts/Chars vector add 11568 instruct vadd2S(vecS dst, vecS src) %{ 11569 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 11570 match(Set dst (AddVS dst src)); 11571 format %{ "paddw $dst,$src\t! add packed2S" %} 11572 ins_encode %{ 11573 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 11574 %} 11575 ins_pipe( pipe_slow ); 11576 %} 11577 11578 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 11579 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11580 match(Set dst (AddVS src1 src2)); 11581 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 11582 ins_encode %{ 11583 int vector_len = 0; 11584 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11585 %} 11586 ins_pipe( pipe_slow ); 11587 %} 11588 11589 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 11590 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11591 match(Set dst (AddVS src1 src2)); 11592 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 11593 ins_encode %{ 11594 int vector_len = 0; 11595 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11596 %} 11597 ins_pipe( pipe_slow ); 11598 %} 11599 11600 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 11601 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11602 match(Set dst (AddVS dst src2)); 11603 effect(TEMP src1); 11604 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 11605 ins_encode %{ 11606 int vector_len = 0; 11607 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11608 %} 11609 ins_pipe( pipe_slow ); 11610 %} 11611 11612 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 11613 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 11614 match(Set dst (AddVS src (LoadVector mem))); 11615 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 11616 ins_encode %{ 11617 int vector_len = 0; 11618 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11619 %} 11620 ins_pipe( pipe_slow ); 11621 %} 11622 11623 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 11624 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 11625 match(Set dst (AddVS src (LoadVector mem))); 11626 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 11627 ins_encode %{ 11628 int vector_len = 0; 11629 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11630 %} 11631 ins_pipe( pipe_slow ); 11632 %} 11633 11634 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 11635 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 11636 match(Set dst (AddVS dst (LoadVector mem))); 11637 effect(TEMP src); 11638 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 11639 ins_encode %{ 11640 int vector_len = 0; 11641 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11642 %} 11643 ins_pipe( pipe_slow ); 11644 %} 11645 11646 instruct vadd4S(vecD dst, vecD src) %{ 11647 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 11648 match(Set dst (AddVS dst src)); 11649 format %{ "paddw $dst,$src\t! add packed4S" %} 11650 ins_encode %{ 11651 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 11652 %} 11653 ins_pipe( pipe_slow ); 11654 %} 11655 11656 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 11657 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11658 match(Set dst (AddVS src1 src2)); 11659 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 11660 ins_encode %{ 11661 int vector_len = 0; 11662 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11663 %} 11664 ins_pipe( pipe_slow ); 11665 %} 11666 11667 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 11668 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11669 match(Set dst (AddVS src1 src2)); 11670 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 11671 ins_encode %{ 11672 int vector_len = 0; 11673 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11674 %} 11675 ins_pipe( pipe_slow ); 11676 %} 11677 11678 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 11679 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11680 match(Set dst (AddVS dst src2)); 11681 effect(TEMP src1); 11682 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 11683 ins_encode %{ 11684 int vector_len = 0; 11685 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11686 %} 11687 ins_pipe( pipe_slow ); 11688 %} 11689 11690 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 11691 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 11692 match(Set dst (AddVS src (LoadVector mem))); 11693 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 11694 ins_encode %{ 11695 int vector_len = 0; 11696 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11697 %} 11698 ins_pipe( pipe_slow ); 11699 %} 11700 11701 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 11702 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 11703 match(Set dst (AddVS src (LoadVector mem))); 11704 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 11705 ins_encode %{ 11706 int vector_len = 0; 11707 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11708 %} 11709 ins_pipe( pipe_slow ); 11710 %} 11711 11712 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 11713 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 11714 match(Set dst (AddVS dst (LoadVector mem))); 11715 effect(TEMP src); 11716 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 11717 ins_encode %{ 11718 int vector_len = 0; 11719 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11720 %} 11721 ins_pipe( pipe_slow ); 11722 %} 11723 11724 instruct vadd8S(vecX dst, vecX src) %{ 11725 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 11726 match(Set dst (AddVS dst src)); 11727 format %{ "paddw $dst,$src\t! add packed8S" %} 11728 ins_encode %{ 11729 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 11730 %} 11731 ins_pipe( pipe_slow ); 11732 %} 11733 11734 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 11735 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11736 match(Set dst (AddVS src1 src2)); 11737 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 11738 ins_encode %{ 11739 int vector_len = 0; 11740 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11741 %} 11742 ins_pipe( pipe_slow ); 11743 %} 11744 11745 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 11746 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11747 match(Set dst (AddVS src1 src2)); 11748 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 11749 ins_encode %{ 11750 int vector_len = 0; 11751 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11752 %} 11753 ins_pipe( pipe_slow ); 11754 %} 11755 11756 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 11757 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11758 match(Set dst (AddVS dst src2)); 11759 effect(TEMP src1); 11760 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 11761 ins_encode %{ 11762 int vector_len = 0; 11763 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11764 %} 11765 ins_pipe( pipe_slow ); 11766 %} 11767 11768 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 11769 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 11770 match(Set dst (AddVS src (LoadVector mem))); 11771 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 11772 ins_encode %{ 11773 int vector_len = 0; 11774 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11775 %} 11776 ins_pipe( pipe_slow ); 11777 %} 11778 11779 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 11780 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 11781 match(Set dst (AddVS src (LoadVector mem))); 11782 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 11783 ins_encode %{ 11784 int vector_len = 0; 11785 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11786 %} 11787 ins_pipe( pipe_slow ); 11788 %} 11789 11790 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 11791 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 11792 match(Set dst (AddVS dst (LoadVector mem))); 11793 effect(TEMP src); 11794 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 11795 ins_encode %{ 11796 int vector_len = 0; 11797 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11798 %} 11799 ins_pipe( pipe_slow ); 11800 %} 11801 11802 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 11803 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11804 match(Set dst (AddVS src1 src2)); 11805 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 11806 ins_encode %{ 11807 int vector_len = 1; 11808 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11809 %} 11810 ins_pipe( pipe_slow ); 11811 %} 11812 11813 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 11814 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11815 match(Set dst (AddVS src1 src2)); 11816 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 11817 ins_encode %{ 11818 int vector_len = 1; 11819 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11820 %} 11821 ins_pipe( pipe_slow ); 11822 %} 11823 11824 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 11825 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11826 match(Set dst (AddVS dst src2)); 11827 effect(TEMP src1); 11828 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 11829 ins_encode %{ 11830 int vector_len = 1; 11831 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11832 %} 11833 ins_pipe( pipe_slow ); 11834 %} 11835 11836 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 11837 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 11838 match(Set dst (AddVS src (LoadVector mem))); 11839 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 11840 ins_encode %{ 11841 int vector_len = 1; 11842 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 11848 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 11849 match(Set dst (AddVS src (LoadVector mem))); 11850 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 11851 ins_encode %{ 11852 int vector_len = 1; 11853 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11854 %} 11855 ins_pipe( pipe_slow ); 11856 %} 11857 11858 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 11859 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 11860 match(Set dst (AddVS dst (LoadVector mem))); 11861 effect(TEMP src); 11862 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 11863 ins_encode %{ 11864 int vector_len = 1; 11865 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11866 %} 11867 ins_pipe( pipe_slow ); 11868 %} 11869 11870 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11871 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11872 match(Set dst (AddVS src1 src2)); 11873 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 11874 ins_encode %{ 11875 int vector_len = 2; 11876 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11877 %} 11878 ins_pipe( pipe_slow ); 11879 %} 11880 11881 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 11882 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 11883 match(Set dst (AddVS src (LoadVector mem))); 11884 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 11885 ins_encode %{ 11886 int vector_len = 2; 11887 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 // Integers vector add 11893 instruct vadd2I(vecD dst, vecD src) %{ 11894 predicate(n->as_Vector()->length() == 2); 11895 match(Set dst (AddVI dst src)); 11896 format %{ "paddd $dst,$src\t! add packed2I" %} 11897 ins_encode %{ 11898 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 11899 %} 11900 ins_pipe( pipe_slow ); 11901 %} 11902 11903 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 11904 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11905 match(Set dst (AddVI src1 src2)); 11906 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 11907 ins_encode %{ 11908 int vector_len = 0; 11909 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11910 %} 11911 ins_pipe( pipe_slow ); 11912 %} 11913 11914 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 11915 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 11916 match(Set dst (AddVI src (LoadVector mem))); 11917 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 11918 ins_encode %{ 11919 int vector_len = 0; 11920 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11921 %} 11922 ins_pipe( pipe_slow ); 11923 %} 11924 11925 instruct vadd4I(vecX dst, vecX src) %{ 11926 predicate(n->as_Vector()->length() == 4); 11927 match(Set dst (AddVI dst src)); 11928 format %{ "paddd $dst,$src\t! add packed4I" %} 11929 ins_encode %{ 11930 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 11931 %} 11932 ins_pipe( pipe_slow ); 11933 %} 11934 11935 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 11936 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11937 match(Set dst (AddVI src1 src2)); 11938 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 11939 ins_encode %{ 11940 int vector_len = 0; 11941 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11942 %} 11943 ins_pipe( pipe_slow ); 11944 %} 11945 11946 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 11947 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 11948 match(Set dst (AddVI src (LoadVector mem))); 11949 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 11950 ins_encode %{ 11951 int vector_len = 0; 11952 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11953 %} 11954 ins_pipe( pipe_slow ); 11955 %} 11956 11957 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 11958 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11959 match(Set dst (AddVI src1 src2)); 11960 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 11961 ins_encode %{ 11962 int vector_len = 1; 11963 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11964 %} 11965 ins_pipe( pipe_slow ); 11966 %} 11967 11968 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 11969 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 11970 match(Set dst (AddVI src (LoadVector mem))); 11971 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 11972 ins_encode %{ 11973 int vector_len = 1; 11974 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11975 %} 11976 ins_pipe( pipe_slow ); 11977 %} 11978 11979 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 11980 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11981 match(Set dst (AddVI src1 src2)); 11982 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 11983 ins_encode %{ 11984 int vector_len = 2; 11985 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 11986 %} 11987 ins_pipe( pipe_slow ); 11988 %} 11989 11990 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 11991 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 11992 match(Set dst (AddVI src (LoadVector mem))); 11993 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 11994 ins_encode %{ 11995 int vector_len = 2; 11996 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 11997 %} 11998 ins_pipe( pipe_slow ); 11999 %} 12000 12001 // Longs vector add 12002 instruct vadd2L(vecX dst, vecX src) %{ 12003 predicate(n->as_Vector()->length() == 2); 12004 match(Set dst (AddVL dst src)); 12005 format %{ "paddq $dst,$src\t! add packed2L" %} 12006 ins_encode %{ 12007 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 12008 %} 12009 ins_pipe( pipe_slow ); 12010 %} 12011 12012 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 12013 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12014 match(Set dst (AddVL src1 src2)); 12015 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 12016 ins_encode %{ 12017 int vector_len = 0; 12018 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12019 %} 12020 ins_pipe( pipe_slow ); 12021 %} 12022 12023 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 12024 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12025 match(Set dst (AddVL src (LoadVector mem))); 12026 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 12027 ins_encode %{ 12028 int vector_len = 0; 12029 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12030 %} 12031 ins_pipe( pipe_slow ); 12032 %} 12033 12034 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 12035 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12036 match(Set dst (AddVL src1 src2)); 12037 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 12038 ins_encode %{ 12039 int vector_len = 1; 12040 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12041 %} 12042 ins_pipe( pipe_slow ); 12043 %} 12044 12045 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 12046 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 12047 match(Set dst (AddVL src (LoadVector mem))); 12048 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 12049 ins_encode %{ 12050 int vector_len = 1; 12051 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12052 %} 12053 ins_pipe( pipe_slow ); 12054 %} 12055 12056 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12057 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12058 match(Set dst (AddVL src1 src2)); 12059 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 12060 ins_encode %{ 12061 int vector_len = 2; 12062 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12063 %} 12064 ins_pipe( pipe_slow ); 12065 %} 12066 12067 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 12068 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12069 match(Set dst (AddVL src (LoadVector mem))); 12070 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 12071 ins_encode %{ 12072 int vector_len = 2; 12073 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12074 %} 12075 ins_pipe( pipe_slow ); 12076 %} 12077 12078 // Floats vector add 12079 instruct vadd2F(vecD dst, vecD src) %{ 12080 predicate(n->as_Vector()->length() == 2); 12081 match(Set dst (AddVF dst src)); 12082 format %{ "addps $dst,$src\t! add packed2F" %} 12083 ins_encode %{ 12084 __ addps($dst$$XMMRegister, $src$$XMMRegister); 12085 %} 12086 ins_pipe( pipe_slow ); 12087 %} 12088 12089 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 12090 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12091 match(Set dst (AddVF src1 src2)); 12092 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 12093 ins_encode %{ 12094 int vector_len = 0; 12095 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12096 %} 12097 ins_pipe( pipe_slow ); 12098 %} 12099 12100 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 12101 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12102 match(Set dst (AddVF src (LoadVector mem))); 12103 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 12104 ins_encode %{ 12105 int vector_len = 0; 12106 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12107 %} 12108 ins_pipe( pipe_slow ); 12109 %} 12110 12111 instruct vadd4F(vecX dst, vecX src) %{ 12112 predicate(n->as_Vector()->length() == 4); 12113 match(Set dst (AddVF dst src)); 12114 format %{ "addps $dst,$src\t! add packed4F" %} 12115 ins_encode %{ 12116 __ addps($dst$$XMMRegister, $src$$XMMRegister); 12117 %} 12118 ins_pipe( pipe_slow ); 12119 %} 12120 12121 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 12122 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12123 match(Set dst (AddVF src1 src2)); 12124 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 12125 ins_encode %{ 12126 int vector_len = 0; 12127 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12128 %} 12129 ins_pipe( pipe_slow ); 12130 %} 12131 12132 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 12133 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12134 match(Set dst (AddVF src (LoadVector mem))); 12135 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 12136 ins_encode %{ 12137 int vector_len = 0; 12138 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12139 %} 12140 ins_pipe( pipe_slow ); 12141 %} 12142 12143 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 12144 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 12145 match(Set dst (AddVF src1 src2)); 12146 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 12147 ins_encode %{ 12148 int vector_len = 1; 12149 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12150 %} 12151 ins_pipe( pipe_slow ); 12152 %} 12153 12154 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 12155 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 12156 match(Set dst (AddVF src (LoadVector mem))); 12157 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 12158 ins_encode %{ 12159 int vector_len = 1; 12160 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12161 %} 12162 ins_pipe( pipe_slow ); 12163 %} 12164 12165 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12166 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12167 match(Set dst (AddVF src1 src2)); 12168 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 12169 ins_encode %{ 12170 int vector_len = 2; 12171 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12172 %} 12173 ins_pipe( pipe_slow ); 12174 %} 12175 12176 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 12177 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 12178 match(Set dst (AddVF src (LoadVector mem))); 12179 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 12180 ins_encode %{ 12181 int vector_len = 2; 12182 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12183 %} 12184 ins_pipe( pipe_slow ); 12185 %} 12186 12187 // Doubles vector add 12188 instruct vadd2D(vecX dst, vecX src) %{ 12189 predicate(n->as_Vector()->length() == 2); 12190 match(Set dst (AddVD dst src)); 12191 format %{ "addpd $dst,$src\t! add packed2D" %} 12192 ins_encode %{ 12193 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 12194 %} 12195 ins_pipe( pipe_slow ); 12196 %} 12197 12198 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 12199 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12200 match(Set dst (AddVD src1 src2)); 12201 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 12202 ins_encode %{ 12203 int vector_len = 0; 12204 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12205 %} 12206 ins_pipe( pipe_slow ); 12207 %} 12208 12209 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 12210 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12211 match(Set dst (AddVD src (LoadVector mem))); 12212 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 12213 ins_encode %{ 12214 int vector_len = 0; 12215 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12216 %} 12217 ins_pipe( pipe_slow ); 12218 %} 12219 12220 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 12221 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12222 match(Set dst (AddVD src1 src2)); 12223 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 12224 ins_encode %{ 12225 int vector_len = 1; 12226 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12227 %} 12228 ins_pipe( pipe_slow ); 12229 %} 12230 12231 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 12232 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12233 match(Set dst (AddVD src (LoadVector mem))); 12234 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 12235 ins_encode %{ 12236 int vector_len = 1; 12237 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12238 %} 12239 ins_pipe( pipe_slow ); 12240 %} 12241 12242 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12243 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12244 match(Set dst (AddVD src1 src2)); 12245 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 12246 ins_encode %{ 12247 int vector_len = 2; 12248 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12249 %} 12250 ins_pipe( pipe_slow ); 12251 %} 12252 12253 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 12254 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 12255 match(Set dst (AddVD src (LoadVector mem))); 12256 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 12257 ins_encode %{ 12258 int vector_len = 2; 12259 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12260 %} 12261 ins_pipe( pipe_slow ); 12262 %} 12263 12264 // --------------------------------- SUB -------------------------------------- 12265 12266 // Bytes vector sub 12267 instruct vsub4B(vecS dst, vecS src) %{ 12268 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12269 match(Set dst (SubVB dst src)); 12270 format %{ "psubb $dst,$src\t! sub packed4B" %} 12271 ins_encode %{ 12272 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 12273 %} 12274 ins_pipe( pipe_slow ); 12275 %} 12276 12277 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 12278 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12279 match(Set dst (SubVB src1 src2)); 12280 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 12281 ins_encode %{ 12282 int vector_len = 0; 12283 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12284 %} 12285 ins_pipe( pipe_slow ); 12286 %} 12287 12288 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 12289 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12290 match(Set dst (SubVB src1 src2)); 12291 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 12292 ins_encode %{ 12293 int vector_len = 0; 12294 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12295 %} 12296 ins_pipe( pipe_slow ); 12297 %} 12298 12299 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 12300 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12301 match(Set dst (SubVB dst src2)); 12302 effect(TEMP src1); 12303 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 12304 ins_encode %{ 12305 int vector_len = 0; 12306 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12307 %} 12308 ins_pipe( pipe_slow ); 12309 %} 12310 12311 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 12312 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12313 match(Set dst (SubVB src (LoadVector mem))); 12314 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 12315 ins_encode %{ 12316 int vector_len = 0; 12317 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12318 %} 12319 ins_pipe( pipe_slow ); 12320 %} 12321 12322 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 12323 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12324 match(Set dst (SubVB src (LoadVector mem))); 12325 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 12326 ins_encode %{ 12327 int vector_len = 0; 12328 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12329 %} 12330 ins_pipe( pipe_slow ); 12331 %} 12332 12333 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 12334 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12335 match(Set dst (SubVB dst (LoadVector mem))); 12336 effect(TEMP src); 12337 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 12338 ins_encode %{ 12339 int vector_len = 0; 12340 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12341 %} 12342 ins_pipe( pipe_slow ); 12343 %} 12344 12345 instruct vsub8B(vecD dst, vecD src) %{ 12346 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12347 match(Set dst (SubVB dst src)); 12348 format %{ "psubb $dst,$src\t! sub packed8B" %} 12349 ins_encode %{ 12350 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 12351 %} 12352 ins_pipe( pipe_slow ); 12353 %} 12354 12355 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 12356 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12357 match(Set dst (SubVB src1 src2)); 12358 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 12359 ins_encode %{ 12360 int vector_len = 0; 12361 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12362 %} 12363 ins_pipe( pipe_slow ); 12364 %} 12365 12366 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 12367 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12368 match(Set dst (SubVB src1 src2)); 12369 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 12370 ins_encode %{ 12371 int vector_len = 0; 12372 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12373 %} 12374 ins_pipe( pipe_slow ); 12375 %} 12376 12377 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 12378 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12379 match(Set dst (SubVB dst src2)); 12380 effect(TEMP src1); 12381 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 12382 ins_encode %{ 12383 int vector_len = 0; 12384 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12385 %} 12386 ins_pipe( pipe_slow ); 12387 %} 12388 12389 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 12390 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12391 match(Set dst (SubVB src (LoadVector mem))); 12392 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 12393 ins_encode %{ 12394 int vector_len = 0; 12395 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12396 %} 12397 ins_pipe( pipe_slow ); 12398 %} 12399 12400 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 12401 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12402 match(Set dst (SubVB src (LoadVector mem))); 12403 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 12404 ins_encode %{ 12405 int vector_len = 0; 12406 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12407 %} 12408 ins_pipe( pipe_slow ); 12409 %} 12410 12411 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 12412 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12413 match(Set dst (SubVB dst (LoadVector mem))); 12414 effect(TEMP src); 12415 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 12416 ins_encode %{ 12417 int vector_len = 0; 12418 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12419 %} 12420 ins_pipe( pipe_slow ); 12421 %} 12422 12423 instruct vsub16B(vecX dst, vecX src) %{ 12424 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 12425 match(Set dst (SubVB dst src)); 12426 format %{ "psubb $dst,$src\t! sub packed16B" %} 12427 ins_encode %{ 12428 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 12429 %} 12430 ins_pipe( pipe_slow ); 12431 %} 12432 12433 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 12434 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 12435 match(Set dst (SubVB src1 src2)); 12436 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 12437 ins_encode %{ 12438 int vector_len = 0; 12439 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12440 %} 12441 ins_pipe( pipe_slow ); 12442 %} 12443 12444 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 12445 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12446 match(Set dst (SubVB src1 src2)); 12447 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 12448 ins_encode %{ 12449 int vector_len = 0; 12450 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12451 %} 12452 ins_pipe( pipe_slow ); 12453 %} 12454 12455 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 12456 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12457 match(Set dst (SubVB dst src2)); 12458 effect(TEMP src1); 12459 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 12460 ins_encode %{ 12461 int vector_len = 0; 12462 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12463 %} 12464 ins_pipe( pipe_slow ); 12465 %} 12466 12467 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 12468 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 12469 match(Set dst (SubVB src (LoadVector mem))); 12470 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 12471 ins_encode %{ 12472 int vector_len = 0; 12473 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12474 %} 12475 ins_pipe( pipe_slow ); 12476 %} 12477 12478 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 12479 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12480 match(Set dst (SubVB src (LoadVector mem))); 12481 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 12482 ins_encode %{ 12483 int vector_len = 0; 12484 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12485 %} 12486 ins_pipe( pipe_slow ); 12487 %} 12488 12489 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 12490 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12491 match(Set dst (SubVB dst (LoadVector mem))); 12492 effect(TEMP src); 12493 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 12494 ins_encode %{ 12495 int vector_len = 0; 12496 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12497 %} 12498 ins_pipe( pipe_slow ); 12499 %} 12500 12501 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 12502 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 12503 match(Set dst (SubVB src1 src2)); 12504 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 12505 ins_encode %{ 12506 int vector_len = 1; 12507 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12508 %} 12509 ins_pipe( pipe_slow ); 12510 %} 12511 12512 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 12513 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12514 match(Set dst (SubVB src1 src2)); 12515 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 12516 ins_encode %{ 12517 int vector_len = 1; 12518 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12519 %} 12520 ins_pipe( pipe_slow ); 12521 %} 12522 12523 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 12524 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 12525 match(Set dst (SubVB dst src2)); 12526 effect(TEMP src1); 12527 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 12528 ins_encode %{ 12529 int vector_len = 1; 12530 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12531 %} 12532 ins_pipe( pipe_slow ); 12533 %} 12534 12535 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 12536 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 12537 match(Set dst (SubVB src (LoadVector mem))); 12538 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 12539 ins_encode %{ 12540 int vector_len = 1; 12541 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12542 %} 12543 ins_pipe( pipe_slow ); 12544 %} 12545 12546 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 12547 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12548 match(Set dst (SubVB src (LoadVector mem))); 12549 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 12550 ins_encode %{ 12551 int vector_len = 1; 12552 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12553 %} 12554 ins_pipe( pipe_slow ); 12555 %} 12556 12557 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 12558 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 12559 match(Set dst (SubVB dst (LoadVector mem))); 12560 effect(TEMP src); 12561 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 12562 ins_encode %{ 12563 int vector_len = 1; 12564 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12565 %} 12566 ins_pipe( pipe_slow ); 12567 %} 12568 12569 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12570 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 12571 match(Set dst (SubVB src1 src2)); 12572 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 12573 ins_encode %{ 12574 int vector_len = 2; 12575 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12576 %} 12577 ins_pipe( pipe_slow ); 12578 %} 12579 12580 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 12581 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 12582 match(Set dst (SubVB src (LoadVector mem))); 12583 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 12584 ins_encode %{ 12585 int vector_len = 2; 12586 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12587 %} 12588 ins_pipe( pipe_slow ); 12589 %} 12590 12591 // Shorts/Chars vector sub 12592 instruct vsub2S(vecS dst, vecS src) %{ 12593 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 12594 match(Set dst (SubVS dst src)); 12595 format %{ "psubw $dst,$src\t! sub packed2S" %} 12596 ins_encode %{ 12597 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 12598 %} 12599 ins_pipe( pipe_slow ); 12600 %} 12601 12602 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 12603 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12604 match(Set dst (SubVS src1 src2)); 12605 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 12606 ins_encode %{ 12607 int vector_len = 0; 12608 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12609 %} 12610 ins_pipe( pipe_slow ); 12611 %} 12612 12613 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 12614 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12615 match(Set dst (SubVS src1 src2)); 12616 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 12617 ins_encode %{ 12618 int vector_len = 0; 12619 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12620 %} 12621 ins_pipe( pipe_slow ); 12622 %} 12623 12624 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 12625 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12626 match(Set dst (SubVS dst src2)); 12627 effect(TEMP src1); 12628 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 12629 ins_encode %{ 12630 int vector_len = 0; 12631 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12632 %} 12633 ins_pipe( pipe_slow ); 12634 %} 12635 12636 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 12637 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 12638 match(Set dst (SubVS src (LoadVector mem))); 12639 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 12640 ins_encode %{ 12641 int vector_len = 0; 12642 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12643 %} 12644 ins_pipe( pipe_slow ); 12645 %} 12646 12647 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 12648 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 12649 match(Set dst (SubVS src (LoadVector mem))); 12650 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 12651 ins_encode %{ 12652 int vector_len = 0; 12653 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12654 %} 12655 ins_pipe( pipe_slow ); 12656 %} 12657 12658 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 12659 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 12660 match(Set dst (SubVS dst (LoadVector mem))); 12661 effect(TEMP src); 12662 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 12663 ins_encode %{ 12664 int vector_len = 0; 12665 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12666 %} 12667 ins_pipe( pipe_slow ); 12668 %} 12669 12670 instruct vsub4S(vecD dst, vecD src) %{ 12671 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 12672 match(Set dst (SubVS dst src)); 12673 format %{ "psubw $dst,$src\t! sub packed4S" %} 12674 ins_encode %{ 12675 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 12676 %} 12677 ins_pipe( pipe_slow ); 12678 %} 12679 12680 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 12681 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12682 match(Set dst (SubVS src1 src2)); 12683 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 12684 ins_encode %{ 12685 int vector_len = 0; 12686 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12687 %} 12688 ins_pipe( pipe_slow ); 12689 %} 12690 12691 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 12692 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12693 match(Set dst (SubVS src1 src2)); 12694 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 12695 ins_encode %{ 12696 int vector_len = 0; 12697 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12698 %} 12699 ins_pipe( pipe_slow ); 12700 %} 12701 12702 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 12703 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12704 match(Set dst (SubVS dst src2)); 12705 effect(TEMP src1); 12706 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 12707 ins_encode %{ 12708 int vector_len = 0; 12709 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12710 %} 12711 ins_pipe( pipe_slow ); 12712 %} 12713 12714 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 12715 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 12716 match(Set dst (SubVS src (LoadVector mem))); 12717 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 12718 ins_encode %{ 12719 int vector_len = 0; 12720 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12721 %} 12722 ins_pipe( pipe_slow ); 12723 %} 12724 12725 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 12726 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 12727 match(Set dst (SubVS src (LoadVector mem))); 12728 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 12729 ins_encode %{ 12730 int vector_len = 0; 12731 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12732 %} 12733 ins_pipe( pipe_slow ); 12734 %} 12735 12736 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 12737 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 12738 match(Set dst (SubVS dst (LoadVector mem))); 12739 effect(TEMP src); 12740 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 12741 ins_encode %{ 12742 int vector_len = 0; 12743 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12744 %} 12745 ins_pipe( pipe_slow ); 12746 %} 12747 12748 instruct vsub8S(vecX dst, vecX src) %{ 12749 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 12750 match(Set dst (SubVS dst src)); 12751 format %{ "psubw $dst,$src\t! sub packed8S" %} 12752 ins_encode %{ 12753 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 12754 %} 12755 ins_pipe( pipe_slow ); 12756 %} 12757 12758 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 12759 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12760 match(Set dst (SubVS src1 src2)); 12761 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 12762 ins_encode %{ 12763 int vector_len = 0; 12764 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12765 %} 12766 ins_pipe( pipe_slow ); 12767 %} 12768 12769 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 12770 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12771 match(Set dst (SubVS src1 src2)); 12772 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 12773 ins_encode %{ 12774 int vector_len = 0; 12775 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12776 %} 12777 ins_pipe( pipe_slow ); 12778 %} 12779 12780 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 12781 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12782 match(Set dst (SubVS dst src2)); 12783 effect(TEMP src1); 12784 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 12785 ins_encode %{ 12786 int vector_len = 0; 12787 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12788 %} 12789 ins_pipe( pipe_slow ); 12790 %} 12791 12792 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 12793 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 12794 match(Set dst (SubVS src (LoadVector mem))); 12795 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 12796 ins_encode %{ 12797 int vector_len = 0; 12798 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12799 %} 12800 ins_pipe( pipe_slow ); 12801 %} 12802 12803 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 12804 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 12805 match(Set dst (SubVS src (LoadVector mem))); 12806 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 12807 ins_encode %{ 12808 int vector_len = 0; 12809 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12810 %} 12811 ins_pipe( pipe_slow ); 12812 %} 12813 12814 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 12815 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 12816 match(Set dst (SubVS dst (LoadVector mem))); 12817 effect(TEMP src); 12818 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 12819 ins_encode %{ 12820 int vector_len = 0; 12821 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12822 %} 12823 ins_pipe( pipe_slow ); 12824 %} 12825 12826 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 12827 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12828 match(Set dst (SubVS src1 src2)); 12829 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 12830 ins_encode %{ 12831 int vector_len = 1; 12832 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12833 %} 12834 ins_pipe( pipe_slow ); 12835 %} 12836 12837 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 12838 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12839 match(Set dst (SubVS src1 src2)); 12840 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 12841 ins_encode %{ 12842 int vector_len = 1; 12843 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12844 %} 12845 ins_pipe( pipe_slow ); 12846 %} 12847 12848 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 12849 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12850 match(Set dst (SubVS dst src2)); 12851 effect(TEMP src1); 12852 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 12853 ins_encode %{ 12854 int vector_len = 1; 12855 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12856 %} 12857 ins_pipe( pipe_slow ); 12858 %} 12859 12860 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 12861 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 12862 match(Set dst (SubVS src (LoadVector mem))); 12863 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 12864 ins_encode %{ 12865 int vector_len = 1; 12866 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12867 %} 12868 ins_pipe( pipe_slow ); 12869 %} 12870 12871 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 12872 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 12873 match(Set dst (SubVS src (LoadVector mem))); 12874 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 12875 ins_encode %{ 12876 int vector_len = 1; 12877 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12878 %} 12879 ins_pipe( pipe_slow ); 12880 %} 12881 12882 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 12883 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 12884 match(Set dst (SubVS dst (LoadVector mem))); 12885 effect(TEMP src); 12886 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 12887 ins_encode %{ 12888 int vector_len = 1; 12889 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12890 %} 12891 ins_pipe( pipe_slow ); 12892 %} 12893 12894 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 12895 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12896 match(Set dst (SubVS src1 src2)); 12897 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 12898 ins_encode %{ 12899 int vector_len = 2; 12900 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12901 %} 12902 ins_pipe( pipe_slow ); 12903 %} 12904 12905 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 12906 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 12907 match(Set dst (SubVS src (LoadVector mem))); 12908 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 12909 ins_encode %{ 12910 int vector_len = 2; 12911 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12912 %} 12913 ins_pipe( pipe_slow ); 12914 %} 12915 12916 // Integers vector sub 12917 instruct vsub2I(vecD dst, vecD src) %{ 12918 predicate(n->as_Vector()->length() == 2); 12919 match(Set dst (SubVI dst src)); 12920 format %{ "psubd $dst,$src\t! sub packed2I" %} 12921 ins_encode %{ 12922 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 12923 %} 12924 ins_pipe( pipe_slow ); 12925 %} 12926 12927 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 12928 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12929 match(Set dst (SubVI src1 src2)); 12930 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 12931 ins_encode %{ 12932 int vector_len = 0; 12933 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12934 %} 12935 ins_pipe( pipe_slow ); 12936 %} 12937 12938 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 12939 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 12940 match(Set dst (SubVI src (LoadVector mem))); 12941 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 12942 ins_encode %{ 12943 int vector_len = 0; 12944 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12945 %} 12946 ins_pipe( pipe_slow ); 12947 %} 12948 12949 instruct vsub4I(vecX dst, vecX src) %{ 12950 predicate(n->as_Vector()->length() == 4); 12951 match(Set dst (SubVI dst src)); 12952 format %{ "psubd $dst,$src\t! sub packed4I" %} 12953 ins_encode %{ 12954 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 12955 %} 12956 ins_pipe( pipe_slow ); 12957 %} 12958 12959 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 12960 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12961 match(Set dst (SubVI src1 src2)); 12962 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 12963 ins_encode %{ 12964 int vector_len = 0; 12965 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12966 %} 12967 ins_pipe( pipe_slow ); 12968 %} 12969 12970 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 12971 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 12972 match(Set dst (SubVI src (LoadVector mem))); 12973 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 12974 ins_encode %{ 12975 int vector_len = 0; 12976 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12977 %} 12978 ins_pipe( pipe_slow ); 12979 %} 12980 12981 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 12982 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12983 match(Set dst (SubVI src1 src2)); 12984 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 12985 ins_encode %{ 12986 int vector_len = 1; 12987 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 12988 %} 12989 ins_pipe( pipe_slow ); 12990 %} 12991 12992 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 12993 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 12994 match(Set dst (SubVI src (LoadVector mem))); 12995 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 12996 ins_encode %{ 12997 int vector_len = 1; 12998 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 12999 %} 13000 ins_pipe( pipe_slow ); 13001 %} 13002 13003 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13004 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13005 match(Set dst (SubVI src1 src2)); 13006 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 13007 ins_encode %{ 13008 int vector_len = 2; 13009 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13010 %} 13011 ins_pipe( pipe_slow ); 13012 %} 13013 13014 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 13015 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13016 match(Set dst (SubVI src (LoadVector mem))); 13017 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 13018 ins_encode %{ 13019 int vector_len = 2; 13020 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13021 %} 13022 ins_pipe( pipe_slow ); 13023 %} 13024 13025 // Longs vector sub 13026 instruct vsub2L(vecX dst, vecX src) %{ 13027 predicate(n->as_Vector()->length() == 2); 13028 match(Set dst (SubVL dst src)); 13029 format %{ "psubq $dst,$src\t! sub packed2L" %} 13030 ins_encode %{ 13031 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 13032 %} 13033 ins_pipe( pipe_slow ); 13034 %} 13035 13036 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 13037 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13038 match(Set dst (SubVL src1 src2)); 13039 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 13040 ins_encode %{ 13041 int vector_len = 0; 13042 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13043 %} 13044 ins_pipe( pipe_slow ); 13045 %} 13046 13047 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 13048 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13049 match(Set dst (SubVL src (LoadVector mem))); 13050 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 13051 ins_encode %{ 13052 int vector_len = 0; 13053 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13054 %} 13055 ins_pipe( pipe_slow ); 13056 %} 13057 13058 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 13059 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 13060 match(Set dst (SubVL src1 src2)); 13061 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 13062 ins_encode %{ 13063 int vector_len = 1; 13064 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13065 %} 13066 ins_pipe( pipe_slow ); 13067 %} 13068 13069 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 13070 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 13071 match(Set dst (SubVL src (LoadVector mem))); 13072 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 13073 ins_encode %{ 13074 int vector_len = 1; 13075 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13076 %} 13077 ins_pipe( pipe_slow ); 13078 %} 13079 13080 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13081 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13082 match(Set dst (SubVL src1 src2)); 13083 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 13084 ins_encode %{ 13085 int vector_len = 2; 13086 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13087 %} 13088 ins_pipe( pipe_slow ); 13089 %} 13090 13091 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 13092 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13093 match(Set dst (SubVL src (LoadVector mem))); 13094 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 13095 ins_encode %{ 13096 int vector_len = 2; 13097 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13098 %} 13099 ins_pipe( pipe_slow ); 13100 %} 13101 13102 // Floats vector sub 13103 instruct vsub2F(vecD dst, vecD src) %{ 13104 predicate(n->as_Vector()->length() == 2); 13105 match(Set dst (SubVF dst src)); 13106 format %{ "subps $dst,$src\t! sub packed2F" %} 13107 ins_encode %{ 13108 __ subps($dst$$XMMRegister, $src$$XMMRegister); 13109 %} 13110 ins_pipe( pipe_slow ); 13111 %} 13112 13113 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 13114 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13115 match(Set dst (SubVF src1 src2)); 13116 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 13117 ins_encode %{ 13118 int vector_len = 0; 13119 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13120 %} 13121 ins_pipe( pipe_slow ); 13122 %} 13123 13124 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 13125 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13126 match(Set dst (SubVF src (LoadVector mem))); 13127 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 13128 ins_encode %{ 13129 int vector_len = 0; 13130 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13131 %} 13132 ins_pipe( pipe_slow ); 13133 %} 13134 13135 instruct vsub4F(vecX dst, vecX src) %{ 13136 predicate(n->as_Vector()->length() == 4); 13137 match(Set dst (SubVF dst src)); 13138 format %{ "subps $dst,$src\t! sub packed4F" %} 13139 ins_encode %{ 13140 __ subps($dst$$XMMRegister, $src$$XMMRegister); 13141 %} 13142 ins_pipe( pipe_slow ); 13143 %} 13144 13145 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 13146 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13147 match(Set dst (SubVF src1 src2)); 13148 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 13149 ins_encode %{ 13150 int vector_len = 0; 13151 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13152 %} 13153 ins_pipe( pipe_slow ); 13154 %} 13155 13156 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 13157 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13158 match(Set dst (SubVF src (LoadVector mem))); 13159 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 13160 ins_encode %{ 13161 int vector_len = 0; 13162 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13163 %} 13164 ins_pipe( pipe_slow ); 13165 %} 13166 13167 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 13168 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 13169 match(Set dst (SubVF src1 src2)); 13170 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 13171 ins_encode %{ 13172 int vector_len = 1; 13173 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13174 %} 13175 ins_pipe( pipe_slow ); 13176 %} 13177 13178 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 13179 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 13180 match(Set dst (SubVF src (LoadVector mem))); 13181 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 13182 ins_encode %{ 13183 int vector_len = 1; 13184 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13185 %} 13186 ins_pipe( pipe_slow ); 13187 %} 13188 13189 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13190 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13191 match(Set dst (SubVF src1 src2)); 13192 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 13193 ins_encode %{ 13194 int vector_len = 2; 13195 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13196 %} 13197 ins_pipe( pipe_slow ); 13198 %} 13199 13200 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 13201 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 13202 match(Set dst (SubVF src (LoadVector mem))); 13203 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 13204 ins_encode %{ 13205 int vector_len = 2; 13206 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13207 %} 13208 ins_pipe( pipe_slow ); 13209 %} 13210 13211 // Doubles vector sub 13212 instruct vsub2D(vecX dst, vecX src) %{ 13213 predicate(n->as_Vector()->length() == 2); 13214 match(Set dst (SubVD dst src)); 13215 format %{ "subpd $dst,$src\t! sub packed2D" %} 13216 ins_encode %{ 13217 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 13218 %} 13219 ins_pipe( pipe_slow ); 13220 %} 13221 13222 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 13223 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13224 match(Set dst (SubVD src1 src2)); 13225 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 13226 ins_encode %{ 13227 int vector_len = 0; 13228 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13229 %} 13230 ins_pipe( pipe_slow ); 13231 %} 13232 13233 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 13234 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13235 match(Set dst (SubVD src (LoadVector mem))); 13236 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 13237 ins_encode %{ 13238 int vector_len = 0; 13239 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13240 %} 13241 ins_pipe( pipe_slow ); 13242 %} 13243 13244 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 13245 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13246 match(Set dst (SubVD src1 src2)); 13247 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 13248 ins_encode %{ 13249 int vector_len = 1; 13250 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13251 %} 13252 ins_pipe( pipe_slow ); 13253 %} 13254 13255 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 13256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13257 match(Set dst (SubVD src (LoadVector mem))); 13258 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 13259 ins_encode %{ 13260 int vector_len = 1; 13261 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13262 %} 13263 ins_pipe( pipe_slow ); 13264 %} 13265 13266 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13267 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13268 match(Set dst (SubVD src1 src2)); 13269 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 13270 ins_encode %{ 13271 int vector_len = 2; 13272 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13273 %} 13274 ins_pipe( pipe_slow ); 13275 %} 13276 13277 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 13278 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 13279 match(Set dst (SubVD src (LoadVector mem))); 13280 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 13281 ins_encode %{ 13282 int vector_len = 2; 13283 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13284 %} 13285 ins_pipe( pipe_slow ); 13286 %} 13287 13288 // --------------------------------- MUL -------------------------------------- 13289 13290 // Byte vector mul 13291 13292 instruct mul4B_reg(vecS dst, vecS src1, vecS src2, vecS tmp2, vecS tmp) %{ 13293 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 13294 match(Set dst (MulVB src1 src2)); 13295 effect(TEMP dst, TEMP tmp2, TEMP tmp); 13296 format %{"pmovsxbw $tmp,$src1\n\t" 13297 "pmovsxbw $tmp2,$src2\n\t" 13298 "pmullw $tmp,$tmp2\n\t" 13299 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 13300 "pand $tmp,$tmp2\n\t" 13301 "packuswb $tmp,$tmp\n\t" 13302 "movss $dst,$tmp\t! mul packed4B" %} 13303 ins_encode %{ 13304 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 13305 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 13306 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 13307 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13308 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 13309 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 13310 __ movss($dst$$XMMRegister, $tmp$$XMMRegister); 13311 %} 13312 ins_pipe( pipe_slow ); 13313 %} 13314 13315 instruct mul8B_reg(vecD dst, vecD src1, vecD src2, vecD tmp2, vecD tmp) %{ 13316 predicate(UseSSE > 3 && n->as_Vector()->length() == 8); 13317 match(Set dst (MulVB src1 src2)); 13318 effect(TEMP dst, TEMP tmp2, TEMP tmp); 13319 format %{"pmovsxbw $tmp,$src1\n\t" 13320 "pmovsxbw $tmp2,$src2\n\t" 13321 "pmullw $tmp,$tmp2\n\t" 13322 "movdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 13323 "pand $tmp,$tmp2\n\t" 13324 "packuswb $tmp,$tmp\n\t" 13325 "movsd $dst,$tmp\t! mul packed8B" %} 13326 ins_encode %{ 13327 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 13328 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 13329 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 13330 __ movdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13331 __ pand($tmp$$XMMRegister, $tmp2$$XMMRegister); 13332 __ packuswb($tmp$$XMMRegister, $tmp$$XMMRegister); 13333 __ movsd($dst$$XMMRegister, $tmp$$XMMRegister); 13334 %} 13335 ins_pipe( pipe_slow ); 13336 %} 13337 13338 instruct mul16B_reg(vecX dst, vecX src1, vecX src2, vecX tmp3, vecX tmp2, vecX tmp) %{ 13339 predicate(UseSSE > 3 && n->as_Vector()->length() == 16); 13340 match(Set dst (MulVB src1 src2)); 13341 effect(TEMP tmp3, TEMP tmp2, TEMP tmp); 13342 format %{"pmovsxbw $tmp,$src1\n\t" 13343 "pmovsxbw $tmp2,$src2\n\t" 13344 "pmullw $tmp,$tmp2\n\t" 13345 "pshufd $tmp2,$src1\n\t" 13346 "pshufd $tmp3,$src2\n\t" 13347 "pmovsxbw $tmp2,$tmp2\n\t" 13348 "pmovsxbw $tmp3,$tmp3\n\t" 13349 "pmullw $tmp2,$tmp3\n\t" 13350 "movdqu $tmp3,[0x00ff00ff0x00ff00ff]\n\t" 13351 "pand $tmp,$tmp3\n\t" 13352 "pand $tmp2,$tmp3\n\t" 13353 "packuswb $tmp,$tmp2\n\t" 13354 "movdqu $dst,$tmp \n\t! mul packed16B" %} 13355 ins_encode %{ 13356 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 13357 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 13358 __ pmullw($tmp$$XMMRegister, $tmp2$$XMMRegister); 13359 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 238); 13360 __ pshufd($tmp3$$XMMRegister, $src2$$XMMRegister, 238); 13361 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 13362 __ pmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister); 13363 __ pmullw($tmp2$$XMMRegister, $tmp3$$XMMRegister); 13364 __ movdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13365 __ pand($tmp$$XMMRegister, $tmp3$$XMMRegister); 13366 __ pand($tmp2$$XMMRegister, $tmp3$$XMMRegister); 13367 __ packuswb($tmp$$XMMRegister, $tmp2$$XMMRegister); 13368 __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister); 13369 %} 13370 ins_pipe( pipe_slow ); 13371 %} 13372 13373 instruct vmul16B_reg_avx(vecX dst, vecX src1, vecX src2, vecY tmp2, vecY tmp) %{ 13374 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 13375 match(Set dst (MulVB src1 src2)); 13376 effect(TEMP dst, TEMP tmp2, TEMP tmp); 13377 format %{"vpmovsxbw $tmp,$src1\n\t" 13378 "vpmovsxbw $tmp2,$src2\n\t" 13379 "vpmullw $tmp,$tmp2\n\t" 13380 "vmovdqu $tmp2,[0x00ff00ff0x00ff00ff]\n\t" 13381 "vpand $tmp,$tmp2\n\t" 13382 "vextracti128_high $tmp2,$tmp\n\t" 13383 "vpackuswb $dst,$tmp, $tmp2\n\t! mul packed16B" %} 13384 ins_encode %{ 13385 int vector_len = 1; 13386 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len); 13387 __ vpmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister, vector_len); 13388 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 13389 __ vmovdqu($tmp2$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13390 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 13391 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 13392 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 13393 %} 13394 ins_pipe( pipe_slow ); 13395 %} 13396 13397 instruct vmul32B_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp1, vecY tmp2, vecY tmp3) %{ 13398 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 13399 match(Set dst (MulVB src1 src2)); 13400 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); 13401 format %{"vextracti128_high $tmp1,$src1\n\t" 13402 "vextracti128_high $tmp3,$src2\n\t" 13403 "vpmovsxbw $tmp1,$tmp1\n\t" 13404 "vpmovsxbw $tmp3,$tmp3\n\t" 13405 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 13406 "vpmovsxbw $tmp2,$src1\n\t" 13407 "vpmovsxbw $tmp3,$src2\n\t" 13408 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 13409 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 13410 "vpbroadcastd $tmp3, $tmp3\n\t" 13411 "vpand $tmp2,$tmp2,$tmp3\n\t" 13412 "vpand $tmp1,$tmp1,$tmp3\n\t" 13413 "vpackuswb $dst,$tmp2,$tmp1\n\t" 13414 "vpermq $dst, $dst, 0xD8\t! mul packed32B" %} 13415 ins_encode %{ 13416 int vector_len = 1; 13417 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 13418 __ vextracti128_high($tmp3$$XMMRegister, $src2$$XMMRegister); 13419 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13420 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13421 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13422 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 13423 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 13424 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13425 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13426 __ vpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister); 13427 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13428 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13429 __ vpackuswb($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13430 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len); 13431 %} 13432 ins_pipe( pipe_slow ); 13433 %} 13434 13435 instruct vmul64B_reg_avx(vecZ dst, vecZ src1, vecZ src2, vecZ tmp1, vecZ tmp2, vecZ tmp3, vecZ tmp4, vecZ tmp5, vecZ tmp6) %{ 13436 predicate(UseAVX > 2 && n->as_Vector()->length() == 64); 13437 match(Set dst (MulVB src1 src2)); 13438 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4); 13439 format %{"vextracti64x4_high $tmp1,$src1\n\t" 13440 "vextracti64x4_high $tmp3,$src2\n\t" 13441 "vpmovsxbw $tmp1,$tmp1\n\t" 13442 "vpmovsxbw $tmp3,$tmp3\n\t" 13443 "vpmullw $tmp1,$tmp1,$tmp3\n\t" 13444 "vpmovsxbw $tmp2,$src1\n\t" 13445 "vpmovsxbw $tmp3,$src2\n\t" 13446 "vpmullw $tmp2,$tmp2,$tmp3\n\t" 13447 "vmovdqu $tmp3, [0x00ff00ff0x00ff00ff]\n\t" 13448 "evpbroadcastd $tmp3, $tmp3\n\t" 13449 "evpandd $tmp1,$tmp1,$tmp3\n\t" 13450 "evpandd $tmp2,$tmp2,$tmp3\n\t" 13451 "vpackuswb $tmp1,$tmp2,$tmp1\n\t" 13452 "vextracti64x4_high $tmp3,$tmp1\n\t" 13453 "vpermq $tmp3, $tmp3, 0x8D\n\t" 13454 "vpermq $tmp1, $tmp1, 0xD8\n\t" 13455 "vmovdqu $tmp4,$tmp3\n\t" 13456 "vmovdqu $tmp2,$tmp1\n\t" 13457 "vpblendd $tmp3,$tmp3,$tmp1\n\t" 13458 "vpblendd $tmp2,$tmp2,$tmp4\n\t" 13459 "vpermq $tmp2,$tmp2,0x4E\n\t" 13460 "vinserti64x4 $dst,$dst,$tmp3,0x00\n\t" 13461 "vinserti64x4 $dst,$dst,$tmp2,0x01\t! mul packed64B" %} 13462 ins_encode %{ 13463 int vector_len = 2; 13464 KRegister ktmp = k1; 13465 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 13466 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 13467 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13468 __ vpmovsxbw($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13469 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13470 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len); 13471 __ vpmovsxbw($tmp3$$XMMRegister, $src2$$XMMRegister, vector_len); 13472 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13473 __ vmovdqu($tmp3$$XMMRegister, ExternalAddress(vector_short_to_byte_mask())); 13474 __ evpbroadcastd($tmp3$$XMMRegister, $tmp3$$XMMRegister, vector_len); 13475 __ evpandd($tmp1$$XMMRegister, ktmp, $tmp1$$XMMRegister, $tmp3$$XMMRegister, false, vector_len); 13476 __ evpandd($tmp2$$XMMRegister, ktmp, $tmp2$$XMMRegister, $tmp3$$XMMRegister, false, vector_len); 13477 __ vpackuswb($tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13478 __ vextracti64x4_high($tmp3$$XMMRegister, $tmp1$$XMMRegister); 13479 __ vpermq($tmp3$$XMMRegister, $tmp3$$XMMRegister, 0x8D, 1); 13480 __ vpermq($tmp1$$XMMRegister, $tmp1$$XMMRegister, 0xD8, 1); 13481 __ vmovdqu($tmp4$$XMMRegister, $tmp3$$XMMRegister); 13482 __ vmovdqu($tmp2$$XMMRegister, $tmp1$$XMMRegister); 13483 __ vpblendd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $tmp1$$XMMRegister, 0x0F, 1); 13484 __ vpblendd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp4$$XMMRegister, 0x0F, 1); 13485 __ vpermq($tmp2$$XMMRegister, $tmp2$$XMMRegister, 0x4E, 1); 13486 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp3$$XMMRegister, 0x00); 13487 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, 0x01); 13488 %} 13489 ins_pipe( pipe_slow ); 13490 %} 13491 13492 // Shorts/Chars vector mul 13493 instruct vmul2S(vecS dst, vecS src) %{ 13494 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 13495 match(Set dst (MulVS dst src)); 13496 format %{ "pmullw $dst,$src\t! mul packed2S" %} 13497 ins_encode %{ 13498 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 13499 %} 13500 ins_pipe( pipe_slow ); 13501 %} 13502 13503 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 13504 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 13505 match(Set dst (MulVS src1 src2)); 13506 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 13507 ins_encode %{ 13508 int vector_len = 0; 13509 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13510 %} 13511 ins_pipe( pipe_slow ); 13512 %} 13513 13514 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 13515 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 13516 match(Set dst (MulVS src1 src2)); 13517 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 13518 ins_encode %{ 13519 int vector_len = 0; 13520 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13521 %} 13522 ins_pipe( pipe_slow ); 13523 %} 13524 13525 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 13526 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 13527 match(Set dst (MulVS dst src2)); 13528 effect(TEMP src1); 13529 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 13530 ins_encode %{ 13531 int vector_len = 0; 13532 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13533 %} 13534 ins_pipe( pipe_slow ); 13535 %} 13536 13537 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 13538 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 13539 match(Set dst (MulVS src (LoadVector mem))); 13540 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 13541 ins_encode %{ 13542 int vector_len = 0; 13543 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13544 %} 13545 ins_pipe( pipe_slow ); 13546 %} 13547 13548 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 13549 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 13550 match(Set dst (MulVS src (LoadVector mem))); 13551 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 13552 ins_encode %{ 13553 int vector_len = 0; 13554 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13555 %} 13556 ins_pipe( pipe_slow ); 13557 %} 13558 13559 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 13560 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 13561 match(Set dst (MulVS dst (LoadVector mem))); 13562 effect(TEMP src); 13563 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 13564 ins_encode %{ 13565 int vector_len = 0; 13566 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13567 %} 13568 ins_pipe( pipe_slow ); 13569 %} 13570 13571 instruct vmul4S(vecD dst, vecD src) %{ 13572 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 13573 match(Set dst (MulVS dst src)); 13574 format %{ "pmullw $dst,$src\t! mul packed4S" %} 13575 ins_encode %{ 13576 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 13577 %} 13578 ins_pipe( pipe_slow ); 13579 %} 13580 13581 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 13582 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 13583 match(Set dst (MulVS src1 src2)); 13584 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 13585 ins_encode %{ 13586 int vector_len = 0; 13587 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13588 %} 13589 ins_pipe( pipe_slow ); 13590 %} 13591 13592 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 13593 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 13594 match(Set dst (MulVS src1 src2)); 13595 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 13596 ins_encode %{ 13597 int vector_len = 0; 13598 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13599 %} 13600 ins_pipe( pipe_slow ); 13601 %} 13602 13603 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 13604 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 13605 match(Set dst (MulVS dst src2)); 13606 effect(TEMP src1); 13607 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 13608 ins_encode %{ 13609 int vector_len = 0; 13610 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13611 %} 13612 ins_pipe( pipe_slow ); 13613 %} 13614 13615 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 13616 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 13617 match(Set dst (MulVS src (LoadVector mem))); 13618 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 13619 ins_encode %{ 13620 int vector_len = 0; 13621 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13622 %} 13623 ins_pipe( pipe_slow ); 13624 %} 13625 13626 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 13627 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 13628 match(Set dst (MulVS src (LoadVector mem))); 13629 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 13630 ins_encode %{ 13631 int vector_len = 0; 13632 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13633 %} 13634 ins_pipe( pipe_slow ); 13635 %} 13636 13637 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 13638 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 13639 match(Set dst (MulVS dst (LoadVector mem))); 13640 effect(TEMP src); 13641 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 13642 ins_encode %{ 13643 int vector_len = 0; 13644 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13645 %} 13646 ins_pipe( pipe_slow ); 13647 %} 13648 13649 instruct vmul8S(vecX dst, vecX src) %{ 13650 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 13651 match(Set dst (MulVS dst src)); 13652 format %{ "pmullw $dst,$src\t! mul packed8S" %} 13653 ins_encode %{ 13654 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 13655 %} 13656 ins_pipe( pipe_slow ); 13657 %} 13658 13659 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 13660 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 13661 match(Set dst (MulVS src1 src2)); 13662 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 13663 ins_encode %{ 13664 int vector_len = 0; 13665 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13666 %} 13667 ins_pipe( pipe_slow ); 13668 %} 13669 13670 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 13671 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 13672 match(Set dst (MulVS src1 src2)); 13673 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 13674 ins_encode %{ 13675 int vector_len = 0; 13676 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13677 %} 13678 ins_pipe( pipe_slow ); 13679 %} 13680 13681 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 13682 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 13683 match(Set dst (MulVS dst src2)); 13684 effect(TEMP src1); 13685 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 13686 ins_encode %{ 13687 int vector_len = 0; 13688 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13689 %} 13690 ins_pipe( pipe_slow ); 13691 %} 13692 13693 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 13694 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 13695 match(Set dst (MulVS src (LoadVector mem))); 13696 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 13697 ins_encode %{ 13698 int vector_len = 0; 13699 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13700 %} 13701 ins_pipe( pipe_slow ); 13702 %} 13703 13704 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 13705 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 13706 match(Set dst (MulVS src (LoadVector mem))); 13707 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 13708 ins_encode %{ 13709 int vector_len = 0; 13710 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13711 %} 13712 ins_pipe( pipe_slow ); 13713 %} 13714 13715 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 13716 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 13717 match(Set dst (MulVS dst (LoadVector mem))); 13718 effect(TEMP src); 13719 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 13720 ins_encode %{ 13721 int vector_len = 0; 13722 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13723 %} 13724 ins_pipe( pipe_slow ); 13725 %} 13726 13727 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 13728 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 13729 match(Set dst (MulVS src1 src2)); 13730 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 13731 ins_encode %{ 13732 int vector_len = 1; 13733 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13734 %} 13735 ins_pipe( pipe_slow ); 13736 %} 13737 13738 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 13739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 13740 match(Set dst (MulVS src1 src2)); 13741 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 13742 ins_encode %{ 13743 int vector_len = 1; 13744 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13745 %} 13746 ins_pipe( pipe_slow ); 13747 %} 13748 13749 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 13750 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 13751 match(Set dst (MulVS dst src2)); 13752 effect(TEMP src1); 13753 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 13754 ins_encode %{ 13755 int vector_len = 1; 13756 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13757 %} 13758 ins_pipe( pipe_slow ); 13759 %} 13760 13761 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 13762 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 13763 match(Set dst (MulVS src (LoadVector mem))); 13764 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 13765 ins_encode %{ 13766 int vector_len = 1; 13767 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13768 %} 13769 ins_pipe( pipe_slow ); 13770 %} 13771 13772 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 13773 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 13774 match(Set dst (MulVS src (LoadVector mem))); 13775 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 13776 ins_encode %{ 13777 int vector_len = 1; 13778 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13779 %} 13780 ins_pipe( pipe_slow ); 13781 %} 13782 13783 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 13784 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 13785 match(Set dst (MulVS dst (LoadVector mem))); 13786 effect(TEMP src); 13787 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 13788 ins_encode %{ 13789 int vector_len = 1; 13790 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13791 %} 13792 ins_pipe( pipe_slow ); 13793 %} 13794 13795 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 13796 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 13797 match(Set dst (MulVS src1 src2)); 13798 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 13799 ins_encode %{ 13800 int vector_len = 2; 13801 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13802 %} 13803 ins_pipe( pipe_slow ); 13804 %} 13805 13806 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 13807 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 13808 match(Set dst (MulVS src (LoadVector mem))); 13809 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 13810 ins_encode %{ 13811 int vector_len = 2; 13812 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13813 %} 13814 ins_pipe( pipe_slow ); 13815 %} 13816 13817 // Integers vector mul (sse4_1) 13818 instruct vmul2I(vecD dst, vecD src) %{ 13819 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 13820 match(Set dst (MulVI dst src)); 13821 format %{ "pmulld $dst,$src\t! mul packed2I" %} 13822 ins_encode %{ 13823 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 13824 %} 13825 ins_pipe( pipe_slow ); 13826 %} 13827 13828 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 13829 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13830 match(Set dst (MulVI src1 src2)); 13831 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 13832 ins_encode %{ 13833 int vector_len = 0; 13834 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13835 %} 13836 ins_pipe( pipe_slow ); 13837 %} 13838 13839 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 13840 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 13841 match(Set dst (MulVI src (LoadVector mem))); 13842 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 13843 ins_encode %{ 13844 int vector_len = 0; 13845 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13846 %} 13847 ins_pipe( pipe_slow ); 13848 %} 13849 13850 instruct vmul4I(vecX dst, vecX src) %{ 13851 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 13852 match(Set dst (MulVI dst src)); 13853 format %{ "pmulld $dst,$src\t! mul packed4I" %} 13854 ins_encode %{ 13855 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 13856 %} 13857 ins_pipe( pipe_slow ); 13858 %} 13859 13860 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 13861 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13862 match(Set dst (MulVI src1 src2)); 13863 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 13864 ins_encode %{ 13865 int vector_len = 0; 13866 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13867 %} 13868 ins_pipe( pipe_slow ); 13869 %} 13870 13871 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 13872 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 13873 match(Set dst (MulVI src (LoadVector mem))); 13874 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 13875 ins_encode %{ 13876 int vector_len = 0; 13877 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13878 %} 13879 ins_pipe( pipe_slow ); 13880 %} 13881 13882 // Long vector mul 13883 13884 instruct mul2L_reg(vecX dst, vecX src2, vecX tmp) %{ 13885 predicate(UseSSE > 3 && n->as_Vector()->length() == 2 && VM_Version::supports_sse4_1()); 13886 match(Set dst (MulVL dst src2)); 13887 effect(TEMP dst, TEMP tmp); 13888 format %{ "pshufd $tmp,$src2, 177\n\t" 13889 "pmulld $tmp,$dst\n\t" 13890 "phaddd $tmp,$tmp\n\t" 13891 "pmovzxdq $tmp,$tmp\n\t" 13892 "psllq $tmp, 32\n\t" 13893 "pmuludq $dst,$src2\n\t" 13894 "paddq $dst,$tmp\n\t! mul packed2L" %} 13895 13896 ins_encode %{ 13897 int vector_len = 0; 13898 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 13899 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 13900 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 13901 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 13902 __ psllq($tmp$$XMMRegister, 32); 13903 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 13904 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 13905 %} 13906 ins_pipe( pipe_slow ); 13907 %} 13908 13909 instruct vmul2L_reg_avx(vecX dst, vecX src1, vecX src2, vecX tmp1, vecX tmp) %{ 13910 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && VM_Version::supports_avx()); 13911 match(Set dst (MulVL src1 src2)); 13912 effect(TEMP tmp1, TEMP tmp); 13913 format %{ "vpshufd $tmp,$src2\n\t" 13914 "vpmulld $tmp,$src1,$tmp\n\t" 13915 "vphaddd $tmp,$tmp,$tmp\n\t" 13916 "vpmovzxdq $tmp,$tmp\n\t" 13917 "vpsllq $tmp,$tmp\n\t" 13918 "vpmuludq $tmp1,$src1,$src2\n\t" 13919 "vpaddq $dst,$tmp,$tmp1\t! mul packed2L" %} 13920 ins_encode %{ 13921 int vector_len = 0; 13922 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 13923 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 13924 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13925 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13926 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 13927 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13928 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13929 %} 13930 ins_pipe( pipe_slow ); 13931 %} 13932 13933 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 13934 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 13935 match(Set dst (MulVL src1 src2)); 13936 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 13937 ins_encode %{ 13938 int vector_len = 0; 13939 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13940 %} 13941 ins_pipe( pipe_slow ); 13942 %} 13943 13944 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 13945 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 13946 match(Set dst (MulVL src (LoadVector mem))); 13947 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 13948 ins_encode %{ 13949 int vector_len = 0; 13950 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13951 %} 13952 ins_pipe( pipe_slow ); 13953 %} 13954 13955 instruct vmul4L_reg_avx(vecY dst, vecY src1, vecY src2, vecY tmp, vecY tmp1,) %{ 13956 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && VM_Version::supports_avx2()); 13957 match(Set dst (MulVL src1 src2)); 13958 effect(TEMP tmp1, TEMP tmp); 13959 format %{ "vpshufd $tmp,$src2\n\t" 13960 "vpmulld $tmp,$src1,$tmp\n\t" 13961 "vphaddd $tmp,$tmp,$tmp\n\t" 13962 "vpmovzxdq $tmp,$tmp\n\t" 13963 "vpsllq $tmp,$tmp\n\t" 13964 "vpmuludq $tmp1,$src1,$src2\n\t" 13965 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 13966 ins_encode %{ 13967 int vector_len = 1; 13968 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vector_len); 13969 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vector_len); 13970 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 13971 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13972 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 13973 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vector_len); 13974 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13975 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vector_len); 13976 %} 13977 ins_pipe( pipe_slow ); 13978 %} 13979 13980 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 13981 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 13982 match(Set dst (MulVL src1 src2)); 13983 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 13984 ins_encode %{ 13985 int vector_len = 1; 13986 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 13987 %} 13988 ins_pipe( pipe_slow ); 13989 %} 13990 13991 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 13992 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 13993 match(Set dst (MulVL src (LoadVector mem))); 13994 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 13995 ins_encode %{ 13996 int vector_len = 1; 13997 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 13998 %} 13999 ins_pipe( pipe_slow ); 14000 %} 14001 14002 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14003 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 14004 match(Set dst (MulVL src1 src2)); 14005 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 14006 ins_encode %{ 14007 int vector_len = 2; 14008 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14009 %} 14010 ins_pipe( pipe_slow ); 14011 %} 14012 14013 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 14014 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 14015 match(Set dst (MulVL src (LoadVector mem))); 14016 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 14017 ins_encode %{ 14018 int vector_len = 2; 14019 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14020 %} 14021 ins_pipe( pipe_slow ); 14022 %} 14023 14024 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 14025 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 14026 match(Set dst (MulVI src1 src2)); 14027 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 14028 ins_encode %{ 14029 int vector_len = 1; 14030 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14031 %} 14032 ins_pipe( pipe_slow ); 14033 %} 14034 14035 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 14036 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 14037 match(Set dst (MulVI src (LoadVector mem))); 14038 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 14039 ins_encode %{ 14040 int vector_len = 1; 14041 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14042 %} 14043 ins_pipe( pipe_slow ); 14044 %} 14045 14046 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14047 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14048 match(Set dst (MulVI src1 src2)); 14049 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 14050 ins_encode %{ 14051 int vector_len = 2; 14052 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14053 %} 14054 ins_pipe( pipe_slow ); 14055 %} 14056 14057 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 14058 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14059 match(Set dst (MulVI src (LoadVector mem))); 14060 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 14061 ins_encode %{ 14062 int vector_len = 2; 14063 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14064 %} 14065 ins_pipe( pipe_slow ); 14066 %} 14067 14068 // Floats vector mul 14069 instruct vmul2F(vecD dst, vecD src) %{ 14070 predicate(n->as_Vector()->length() == 2); 14071 match(Set dst (MulVF dst src)); 14072 format %{ "mulps $dst,$src\t! mul packed2F" %} 14073 ins_encode %{ 14074 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 14075 %} 14076 ins_pipe( pipe_slow ); 14077 %} 14078 14079 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 14080 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14081 match(Set dst (MulVF src1 src2)); 14082 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 14083 ins_encode %{ 14084 int vector_len = 0; 14085 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14086 %} 14087 ins_pipe( pipe_slow ); 14088 %} 14089 14090 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 14091 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14092 match(Set dst (MulVF src (LoadVector mem))); 14093 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 14094 ins_encode %{ 14095 int vector_len = 0; 14096 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14097 %} 14098 ins_pipe( pipe_slow ); 14099 %} 14100 14101 instruct vmul4F(vecX dst, vecX src) %{ 14102 predicate(n->as_Vector()->length() == 4); 14103 match(Set dst (MulVF dst src)); 14104 format %{ "mulps $dst,$src\t! mul packed4F" %} 14105 ins_encode %{ 14106 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 14107 %} 14108 ins_pipe( pipe_slow ); 14109 %} 14110 14111 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 14112 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14113 match(Set dst (MulVF src1 src2)); 14114 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 14115 ins_encode %{ 14116 int vector_len = 0; 14117 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14118 %} 14119 ins_pipe( pipe_slow ); 14120 %} 14121 14122 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 14123 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14124 match(Set dst (MulVF src (LoadVector mem))); 14125 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 14126 ins_encode %{ 14127 int vector_len = 0; 14128 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14129 %} 14130 ins_pipe( pipe_slow ); 14131 %} 14132 14133 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 14134 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14135 match(Set dst (MulVF src1 src2)); 14136 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 14137 ins_encode %{ 14138 int vector_len = 1; 14139 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14140 %} 14141 ins_pipe( pipe_slow ); 14142 %} 14143 14144 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 14145 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14146 match(Set dst (MulVF src (LoadVector mem))); 14147 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 14148 ins_encode %{ 14149 int vector_len = 1; 14150 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14151 %} 14152 ins_pipe( pipe_slow ); 14153 %} 14154 14155 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14156 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14157 match(Set dst (MulVF src1 src2)); 14158 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 14159 ins_encode %{ 14160 int vector_len = 2; 14161 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14162 %} 14163 ins_pipe( pipe_slow ); 14164 %} 14165 14166 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 14167 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 14168 match(Set dst (MulVF src (LoadVector mem))); 14169 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 14170 ins_encode %{ 14171 int vector_len = 2; 14172 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14173 %} 14174 ins_pipe( pipe_slow ); 14175 %} 14176 14177 // Doubles vector mul 14178 instruct vmul2D(vecX dst, vecX src) %{ 14179 predicate(n->as_Vector()->length() == 2); 14180 match(Set dst (MulVD dst src)); 14181 format %{ "mulpd $dst,$src\t! mul packed2D" %} 14182 ins_encode %{ 14183 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 14184 %} 14185 ins_pipe( pipe_slow ); 14186 %} 14187 14188 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 14189 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14190 match(Set dst (MulVD src1 src2)); 14191 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 14192 ins_encode %{ 14193 int vector_len = 0; 14194 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14195 %} 14196 ins_pipe( pipe_slow ); 14197 %} 14198 14199 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 14200 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14201 match(Set dst (MulVD src (LoadVector mem))); 14202 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 14203 ins_encode %{ 14204 int vector_len = 0; 14205 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14206 %} 14207 ins_pipe( pipe_slow ); 14208 %} 14209 14210 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 14211 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14212 match(Set dst (MulVD src1 src2)); 14213 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 14214 ins_encode %{ 14215 int vector_len = 1; 14216 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14217 %} 14218 ins_pipe( pipe_slow ); 14219 %} 14220 14221 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 14222 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14223 match(Set dst (MulVD src (LoadVector mem))); 14224 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 14225 ins_encode %{ 14226 int vector_len = 1; 14227 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14228 %} 14229 ins_pipe( pipe_slow ); 14230 %} 14231 14232 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14233 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14234 match(Set dst (MulVD src1 src2)); 14235 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 14236 ins_encode %{ 14237 int vector_len = 2; 14238 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14239 %} 14240 ins_pipe( pipe_slow ); 14241 %} 14242 14243 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 14244 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14245 match(Set dst (MulVD src (LoadVector mem))); 14246 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 14247 ins_encode %{ 14248 int vector_len = 2; 14249 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14250 %} 14251 ins_pipe( pipe_slow ); 14252 %} 14253 14254 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 14255 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 14256 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 14257 effect(TEMP dst, USE src1, USE src2); 14258 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 14259 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 14260 %} 14261 ins_encode %{ 14262 int vector_len = 1; 14263 int cond = (Assembler::Condition)($copnd$$cmpcode); 14264 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 14265 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14266 %} 14267 ins_pipe( pipe_slow ); 14268 %} 14269 14270 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 14271 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 14272 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 14273 effect(TEMP dst, USE src1, USE src2); 14274 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 14275 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 14276 %} 14277 ins_encode %{ 14278 int vector_len = 1; 14279 int cond = (Assembler::Condition)($copnd$$cmpcode); 14280 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 14281 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14282 %} 14283 ins_pipe( pipe_slow ); 14284 %} 14285 14286 // --------------------------------- DIV -------------------------------------- 14287 14288 // Floats vector div 14289 instruct vdiv2F(vecD dst, vecD src) %{ 14290 predicate(n->as_Vector()->length() == 2); 14291 match(Set dst (DivVF dst src)); 14292 format %{ "divps $dst,$src\t! div packed2F" %} 14293 ins_encode %{ 14294 __ divps($dst$$XMMRegister, $src$$XMMRegister); 14295 %} 14296 ins_pipe( pipe_slow ); 14297 %} 14298 14299 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 14300 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14301 match(Set dst (DivVF src1 src2)); 14302 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 14303 ins_encode %{ 14304 int vector_len = 0; 14305 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14306 %} 14307 ins_pipe( pipe_slow ); 14308 %} 14309 14310 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 14311 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14312 match(Set dst (DivVF src (LoadVector mem))); 14313 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 14314 ins_encode %{ 14315 int vector_len = 0; 14316 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14317 %} 14318 ins_pipe( pipe_slow ); 14319 %} 14320 14321 instruct vdiv4F(vecX dst, vecX src) %{ 14322 predicate(n->as_Vector()->length() == 4); 14323 match(Set dst (DivVF dst src)); 14324 format %{ "divps $dst,$src\t! div packed4F" %} 14325 ins_encode %{ 14326 __ divps($dst$$XMMRegister, $src$$XMMRegister); 14327 %} 14328 ins_pipe( pipe_slow ); 14329 %} 14330 14331 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 14332 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14333 match(Set dst (DivVF src1 src2)); 14334 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 14335 ins_encode %{ 14336 int vector_len = 0; 14337 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14338 %} 14339 ins_pipe( pipe_slow ); 14340 %} 14341 14342 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 14343 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14344 match(Set dst (DivVF src (LoadVector mem))); 14345 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 14346 ins_encode %{ 14347 int vector_len = 0; 14348 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14349 %} 14350 ins_pipe( pipe_slow ); 14351 %} 14352 14353 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 14354 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14355 match(Set dst (DivVF src1 src2)); 14356 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 14357 ins_encode %{ 14358 int vector_len = 1; 14359 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14360 %} 14361 ins_pipe( pipe_slow ); 14362 %} 14363 14364 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 14365 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 14366 match(Set dst (DivVF src (LoadVector mem))); 14367 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 14368 ins_encode %{ 14369 int vector_len = 1; 14370 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14371 %} 14372 ins_pipe( pipe_slow ); 14373 %} 14374 14375 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14376 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 14377 match(Set dst (DivVF src1 src2)); 14378 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 14379 ins_encode %{ 14380 int vector_len = 2; 14381 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14382 %} 14383 ins_pipe( pipe_slow ); 14384 %} 14385 14386 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 14387 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 14388 match(Set dst (DivVF src (LoadVector mem))); 14389 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 14390 ins_encode %{ 14391 int vector_len = 2; 14392 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14393 %} 14394 ins_pipe( pipe_slow ); 14395 %} 14396 14397 // Doubles vector div 14398 instruct vdiv2D(vecX dst, vecX src) %{ 14399 predicate(n->as_Vector()->length() == 2); 14400 match(Set dst (DivVD dst src)); 14401 format %{ "divpd $dst,$src\t! div packed2D" %} 14402 ins_encode %{ 14403 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 14404 %} 14405 ins_pipe( pipe_slow ); 14406 %} 14407 14408 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 14409 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14410 match(Set dst (DivVD src1 src2)); 14411 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 14412 ins_encode %{ 14413 int vector_len = 0; 14414 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14415 %} 14416 ins_pipe( pipe_slow ); 14417 %} 14418 14419 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 14420 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 14421 match(Set dst (DivVD src (LoadVector mem))); 14422 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 14423 ins_encode %{ 14424 int vector_len = 0; 14425 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14426 %} 14427 ins_pipe( pipe_slow ); 14428 %} 14429 14430 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 14431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14432 match(Set dst (DivVD src1 src2)); 14433 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 14434 ins_encode %{ 14435 int vector_len = 1; 14436 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14437 %} 14438 ins_pipe( pipe_slow ); 14439 %} 14440 14441 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 14442 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 14443 match(Set dst (DivVD src (LoadVector mem))); 14444 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 14445 ins_encode %{ 14446 int vector_len = 1; 14447 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14448 %} 14449 ins_pipe( pipe_slow ); 14450 %} 14451 14452 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 14453 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14454 match(Set dst (DivVD src1 src2)); 14455 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 14456 ins_encode %{ 14457 int vector_len = 2; 14458 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14459 %} 14460 ins_pipe( pipe_slow ); 14461 %} 14462 14463 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 14464 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 14465 match(Set dst (DivVD src (LoadVector mem))); 14466 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 14467 ins_encode %{ 14468 int vector_len = 2; 14469 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 14470 %} 14471 ins_pipe( pipe_slow ); 14472 %} 14473 14474 // ------------------------------ Min --------------------------------------- 14475 // Byte vector Min 14476 instruct min8B_reg(vecD dst, vecD src1, vecD src2) %{ 14477 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14478 match(Set dst (MinV src1 src2)); 14479 effect(TEMP dst); 14480 format %{ "movdqu $dst,$src1\n\t" 14481 "pminsb $dst,$src2\t! " %} 14482 ins_encode %{ 14483 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14484 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 14485 %} 14486 ins_pipe( pipe_slow ); 14487 %} 14488 14489 instruct min8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14490 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14491 match(Set dst (MinV src1 src2)); 14492 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14493 ins_encode %{ 14494 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 14495 %} 14496 ins_pipe( pipe_slow ); 14497 %} 14498 14499 instruct min16B_reg(vecX dst, vecX src1, vecX src2) %{ 14500 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14501 match(Set dst (MinV src1 src2)); 14502 effect(TEMP dst); 14503 format %{ "movdqu $dst,$src1\n\t" 14504 "pminsb $dst,$src2\t! " %} 14505 ins_encode %{ 14506 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14507 __ pminsb($dst$$XMMRegister, $src2$$XMMRegister); 14508 %} 14509 ins_pipe( pipe_slow ); 14510 %} 14511 14512 instruct min16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14513 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14514 match(Set dst (MinV src1 src2)); 14515 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14516 ins_encode %{ 14517 int vector_len = 0; 14518 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14519 %} 14520 ins_pipe( pipe_slow ); 14521 %} 14522 14523 instruct min32B_reg(vecY dst, vecY src1, vecY src2) %{ 14524 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14525 match(Set dst (MinV src1 src2)); 14526 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14527 ins_encode %{ 14528 int vector_len = 1; 14529 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14530 %} 14531 ins_pipe( pipe_slow ); 14532 %} 14533 14534 instruct min64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14535 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 14536 match(Set dst (MinV src1 src2)); 14537 format %{ "vpminsb $dst,$src1,$src2\t! " %} 14538 ins_encode %{ 14539 int vector_len = 2; 14540 __ vpminsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14541 %} 14542 ins_pipe( pipe_slow ); 14543 %} 14544 14545 //Short vector Min 14546 instruct min4S_reg(vecD dst, vecD src1, vecD src2) %{ 14547 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14548 match(Set dst (MinV src1 src2)); 14549 effect(TEMP dst); 14550 format %{ "movsd $dst,$src1\n\t" 14551 "pminsw $dst,$src2\t! " %} 14552 ins_encode %{ 14553 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14554 __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); 14555 %} 14556 ins_pipe( pipe_slow ); 14557 %} 14558 14559 instruct min4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14560 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14561 match(Set dst (MinV src1 src2)); 14562 effect(TEMP dst); 14563 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14564 ins_encode %{ 14565 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 14566 %} 14567 ins_pipe( pipe_slow ); 14568 %} 14569 14570 instruct min8S_reg(vecX dst, vecX src1, vecX src2) %{ 14571 predicate(UseSSE >= 2 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14572 match(Set dst (MinV src1 src2)); 14573 effect(TEMP dst); 14574 format %{ "movdqu $dst,$src1\n\t" 14575 "pminsw $dst,$src2\t! " %} 14576 ins_encode %{ 14577 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14578 __ pminsw($dst$$XMMRegister, $src2$$XMMRegister); 14579 %} 14580 ins_pipe( pipe_slow ); 14581 %} 14582 14583 instruct min8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14584 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14585 match(Set dst (MinV src1 src2)); 14586 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14587 ins_encode %{ 14588 int vector_len = 0; 14589 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14590 %} 14591 ins_pipe( pipe_slow ); 14592 %} 14593 14594 instruct min16S_reg(vecY dst, vecY src1, vecY src2) %{ 14595 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14596 match(Set dst (MinV src1 src2)); 14597 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14598 ins_encode %{ 14599 int vector_len = 1; 14600 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14601 %} 14602 ins_pipe( pipe_slow ); 14603 %} 14604 14605 instruct min32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14606 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 14607 match(Set dst (MinV src1 src2)); 14608 format %{ "vpminsw $dst,$src1,$src2\t! " %} 14609 ins_encode %{ 14610 int vector_len = 2; 14611 __ vpminsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14612 %} 14613 ins_pipe( pipe_slow ); 14614 %} 14615 14616 // Int vector Min 14617 instruct min2I_reg(vecD dst, vecD src1, vecD src2) %{ 14618 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14619 match(Set dst (MinV src1 src2)); 14620 effect(TEMP dst); 14621 format %{ "movsd $dst,$src1\n\t" 14622 "pminsd $dst,$src2\t! " %} 14623 ins_encode %{ 14624 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14625 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 14626 %} 14627 ins_pipe( pipe_slow ); 14628 %} 14629 14630 instruct min2I_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14631 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14632 match(Set dst (MinV src1 src2)); 14633 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14634 ins_encode %{ 14635 int vector_len = 0; 14636 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14637 %} 14638 ins_pipe( pipe_slow ); 14639 %} 14640 14641 instruct min4I_reg(vecX dst, vecX src1, vecX src2) %{ 14642 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14643 match(Set dst (MinV src1 src2)); 14644 effect(TEMP dst); 14645 format %{ "movdqu $dst,$src1\n\t" 14646 "pminsd $dst,$src2\t! " %} 14647 ins_encode %{ 14648 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14649 __ pminsd($dst$$XMMRegister, $src2$$XMMRegister); 14650 %} 14651 ins_pipe( pipe_slow ); 14652 %} 14653 14654 instruct min4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14655 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14656 match(Set dst (MinV src1 src2)); 14657 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14658 ins_encode %{ 14659 int vector_len = 0; 14660 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14661 %} 14662 ins_pipe( pipe_slow ); 14663 %} 14664 14665 instruct min4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14666 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14667 match(Set dst (MinV src1 src2)); 14668 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14669 ins_encode %{ 14670 int vector_len = 0; 14671 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14672 %} 14673 ins_pipe( pipe_slow ); 14674 %} 14675 14676 instruct min8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14677 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14678 match(Set dst (MinV src1 src2)); 14679 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14680 ins_encode %{ 14681 int vector_len = 1; 14682 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14683 %} 14684 ins_pipe( pipe_slow ); 14685 %} 14686 14687 instruct min8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14688 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14689 match(Set dst (MinV src1 src2)); 14690 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14691 ins_encode %{ 14692 int vector_len = 1; 14693 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14694 %} 14695 ins_pipe( pipe_slow ); 14696 %} 14697 14698 instruct min16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14699 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 14700 match(Set dst (MinV src1 src2)); 14701 format %{ "vpminsd $dst,$src1,$src2\t! " %} 14702 ins_encode %{ 14703 int vector_len = 2; 14704 __ vpminsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14705 %} 14706 ins_pipe( pipe_slow ); 14707 %} 14708 14709 // Long vector Min 14710 instruct minL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 14711 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14712 match(Set dst (MinV src1 src2)); 14713 effect(TEMP dst, TEMP tmp); 14714 format %{ "movsd $tmp,$src1\n\t" 14715 "movsd $dst,$src1\n\t" 14716 "pcmpgtq $tmp,$src2\n\t" 14717 "blendvpd $dst,$src2\t! " %} 14718 ins_encode %{ 14719 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 14720 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14721 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 14722 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 14723 %} 14724 ins_pipe( pipe_slow ); 14725 %} 14726 14727 instruct min1L_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14728 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14729 match(Set dst (MinV src1 src2)); 14730 effect(TEMP dst); 14731 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 14732 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 14733 ins_encode %{ 14734 int vector_len = 0; 14735 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14736 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14737 %} 14738 ins_pipe( pipe_slow ); 14739 %} 14740 14741 instruct min2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 14742 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14743 match(Set dst (MinV src1 src2)); 14744 effect(TEMP dst, TEMP tmp); 14745 format %{ "movdqu $tmp,$src1\n\t" 14746 "movdqu $dst,$src1\n\t" 14747 "pcmpgtq $tmp,$src2\n\t" 14748 "blendvpd $dst,$src2\t! " %} 14749 ins_encode %{ 14750 __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister); 14751 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14752 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 14753 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 14754 %} 14755 ins_pipe( pipe_slow ); 14756 %} 14757 14758 instruct min2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14759 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14760 match(Set dst (MinV src1 src2)); 14761 effect(TEMP dst); 14762 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 14763 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 14764 ins_encode %{ 14765 int vector_len = 0; 14766 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14767 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14768 %} 14769 ins_pipe( pipe_slow ); 14770 %} 14771 14772 instruct min4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14773 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14774 match(Set dst (MinV src1 src2)); 14775 effect(TEMP dst); 14776 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 14777 "vblendvpd $dst,$src1,$src2,$dst\t! " %} 14778 ins_encode %{ 14779 int vector_len = 1; 14780 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14781 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 14782 %} 14783 ins_pipe( pipe_slow ); 14784 %} 14785 14786 instruct min2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14787 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14788 match(Set dst (MinV src1 src2)); 14789 format %{ "vpminsq $dst,$src1,src2\t! " %} 14790 ins_encode %{ 14791 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 14792 %} 14793 ins_pipe( pipe_slow ); 14794 %} 14795 14796 instruct min4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14797 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14798 match(Set dst (MinV src1 src2)); 14799 format %{ "vpminsq $dst,$src1,src2\t! " %} 14800 ins_encode %{ 14801 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 14802 %} 14803 ins_pipe( pipe_slow ); 14804 %} 14805 14806 instruct min8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14807 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 14808 match(Set dst (MinV src1 src2)); 14809 format %{ "vpminsq $dst,$src1,src2\t! " %} 14810 ins_encode %{ 14811 __ vpminsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); 14812 %} 14813 ins_pipe( pipe_slow ); 14814 %} 14815 14816 // Float vector Min 14817 instruct min2F_reg(vecD dst, vecD src1, vecD src2) %{ 14818 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14819 match(Set dst (MinV src1 src2)); 14820 effect(TEMP dst); 14821 format %{ "movsd $dst,$src1\n\t" 14822 "minps $dst,$src2\t! " %} 14823 ins_encode %{ 14824 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14825 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 14826 %} 14827 ins_pipe( pipe_slow ); 14828 %} 14829 14830 instruct min2F_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14831 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14832 match(Set dst (MinV src1 src2)); 14833 format %{ "vminps $dst,$src1,$src2\t! " %} 14834 ins_encode %{ 14835 int vector_len = 0; 14836 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14837 %} 14838 ins_pipe( pipe_slow ); 14839 %} 14840 14841 instruct min4F_reg(vecX dst, vecX src1, vecX src2) %{ 14842 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14843 match(Set dst (MinV src1 src2)); 14844 effect(TEMP dst); 14845 format %{ "movdqu $dst,$src1\n\t" 14846 "minps $dst,$src2\t! " %} 14847 ins_encode %{ 14848 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14849 __ minps($dst$$XMMRegister, $src2$$XMMRegister); 14850 %} 14851 ins_pipe( pipe_slow ); 14852 %} 14853 14854 instruct min4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14855 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14856 match(Set dst (MinV src1 src2)); 14857 format %{ "vminps $dst,$src1,$src2\t! " %} 14858 ins_encode %{ 14859 int vector_len = 0; 14860 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14861 %} 14862 ins_pipe( pipe_slow ); 14863 %} 14864 14865 instruct min4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14866 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14867 match(Set dst (MinV src1 src2)); 14868 format %{ "vminps $dst,$src1,$src2\t! " %} 14869 ins_encode %{ 14870 int vector_len = 0; 14871 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14872 %} 14873 ins_pipe( pipe_slow ); 14874 %} 14875 14876 instruct min8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14877 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14878 match(Set dst (MinV src1 src2)); 14879 format %{ "vminps $dst,$src1,$src2\t! " %} 14880 ins_encode %{ 14881 int vector_len = 1; 14882 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14883 %} 14884 ins_pipe( pipe_slow ); 14885 %} 14886 14887 instruct min8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14888 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14889 match(Set dst (MinV src1 src2)); 14890 format %{ "vminps $dst,$src1,$src2\t! " %} 14891 ins_encode %{ 14892 int vector_len = 1; 14893 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14894 %} 14895 ins_pipe( pipe_slow ); 14896 %} 14897 14898 instruct min16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14899 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 14900 match(Set dst (MinV src1 src2)); 14901 format %{ "vminps $dst,$src1,$src2\t! " %} 14902 ins_encode %{ 14903 int vector_len = 2; 14904 __ vminps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14905 %} 14906 ins_pipe( pipe_slow ); 14907 %} 14908 14909 // Double vector Min 14910 instruct minD_reg(vecD dst, vecD src1, vecD src2) %{ 14911 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14912 match(Set dst (MinV src1 src2)); 14913 effect(TEMP dst); 14914 format %{ "movsd $dst,$src1\n\t" 14915 "minpd $dst,$src2\t! " %} 14916 ins_encode %{ 14917 int vector_len = 0; 14918 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 14919 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 14920 %} 14921 ins_pipe( pipe_slow ); 14922 %} 14923 14924 instruct min1D_reg_avx(vecD dst, vecD src1, vecD src2) %{ 14925 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14926 match(Set dst (MinV src1 src2)); 14927 format %{ "vminpd $dst,$src1,$src2\t! " %} 14928 ins_encode %{ 14929 int vector_len = 0; 14930 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14931 %} 14932 ins_pipe( pipe_slow ); 14933 %} 14934 14935 instruct min2D_reg(vecX dst, vecX src1, vecX src2) %{ 14936 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14937 match(Set dst (MinV src1 src2)); 14938 effect(TEMP dst); 14939 format %{ "movdqu $dst,$src1\n\t" 14940 "minpd $dst,$src2\t! " %} 14941 ins_encode %{ 14942 int vector_len = 0; 14943 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 14944 __ minpd($dst$$XMMRegister, $src2$$XMMRegister); 14945 %} 14946 ins_pipe( pipe_slow ); 14947 %} 14948 14949 instruct min2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 14950 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14951 match(Set dst (MinV src1 src2)); 14952 format %{ "vminpd $dst,$src1,$src2\t! " %} 14953 ins_encode %{ 14954 int vector_len = 0; 14955 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14956 %} 14957 ins_pipe( pipe_slow ); 14958 %} 14959 14960 instruct min2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 14961 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14962 match(Set dst (MinV src1 src2)); 14963 format %{ "vminpd $dst,$src1,$src2\t! " %} 14964 ins_encode %{ 14965 int vector_len = 0; 14966 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14967 %} 14968 ins_pipe( pipe_slow ); 14969 %} 14970 14971 instruct min4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 14972 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14973 match(Set dst (MinV src1 src2)); 14974 format %{ "vminpd $dst,$src1,$src2\t! " %} 14975 ins_encode %{ 14976 int vector_len = 1; 14977 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14978 %} 14979 ins_pipe( pipe_slow ); 14980 %} 14981 14982 instruct min4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 14983 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14984 match(Set dst (MinV src1 src2)); 14985 format %{ "vminpd $dst,$src1,$src2\t! " %} 14986 ins_encode %{ 14987 int vector_len = 1; 14988 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 14989 %} 14990 ins_pipe( pipe_slow ); 14991 %} 14992 14993 instruct min8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 14994 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 14995 match(Set dst (MinV src1 src2)); 14996 format %{ "vminpd $dst,$src1,$src2\t! " %} 14997 ins_encode %{ 14998 int vector_len = 2; 14999 __ vminpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15000 %} 15001 ins_pipe( pipe_slow ); 15002 %} 15003 15004 // ------------------------------ Max --------------------------------------- 15005 // Byte vector Max 15006 instruct max8B_reg(vecD dst, vecD src1, vecD src2) %{ 15007 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15008 match(Set dst (MaxV src1 src2)); 15009 effect(TEMP dst); 15010 format %{ "movsd $dst,$src1\n\t" 15011 "pmaxsb $dst,$src2\t! " %} 15012 ins_encode %{ 15013 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15014 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 15015 %} 15016 ins_pipe( pipe_slow ); 15017 %} 15018 15019 instruct max8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15020 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15021 match(Set dst (MaxV src1 src2)); 15022 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15023 ins_encode %{ 15024 int vector_len = 0; 15025 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15026 %} 15027 ins_pipe( pipe_slow ); 15028 %} 15029 15030 instruct max16B_reg(vecX dst, vecX src1, vecX src2) %{ 15031 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15032 match(Set dst (MaxV src1 src2)); 15033 effect(TEMP dst); 15034 format %{ "movdqu $dst,$src1\n\t" 15035 "pmaxsb $dst,$src2\t! " %} 15036 ins_encode %{ 15037 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15038 __ pmaxsb($dst$$XMMRegister, $src2$$XMMRegister); 15039 %} 15040 ins_pipe( pipe_slow ); 15041 %} 15042 15043 instruct max16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15044 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15045 match(Set dst (MaxV src1 src2)); 15046 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15047 ins_encode %{ 15048 int vector_len = 0; 15049 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15050 %} 15051 ins_pipe( pipe_slow ); 15052 %} 15053 15054 instruct max32B_reg(vecY dst, vecY src1, vecY src2) %{ 15055 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15056 match(Set dst (MaxV src1 src2)); 15057 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15058 ins_encode %{ 15059 int vector_len = 1; 15060 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15061 %} 15062 ins_pipe( pipe_slow ); 15063 %} 15064 15065 instruct max64B_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15066 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 15067 match(Set dst (MaxV src1 src2)); 15068 format %{ "vpmaxsb $dst,$src1,$src2\t! " %} 15069 ins_encode %{ 15070 int vector_len = 2; 15071 __ vpmaxsb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15072 %} 15073 ins_pipe( pipe_slow ); 15074 %} 15075 15076 //Short vector Max 15077 instruct max4S_reg(vecD dst, vecD src1, vecD src2) %{ 15078 predicate(UseSSE > 1 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15079 match(Set dst (MaxV src1 src2)); 15080 effect(TEMP dst); 15081 format %{ "movsd $dst,$src1\n\t" 15082 "pmaxsw $dst,$src2\t! " %} 15083 ins_encode %{ 15084 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15085 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 15086 %} 15087 ins_pipe( pipe_slow ); 15088 %} 15089 15090 instruct max4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15091 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15092 match(Set dst (MaxV src1 src2)); 15093 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15094 ins_encode %{ 15095 int vector_len = 0; 15096 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15097 %} 15098 ins_pipe( pipe_slow ); 15099 %} 15100 15101 instruct max8S_reg(vecX dst, vecX src1, vecX src2) %{ 15102 predicate(UseSSE > 1 && UseAVX == 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15103 match(Set dst (MaxV src1 src2)); 15104 effect(TEMP dst); 15105 format %{ "movdqu $dst,$src1\n\t" 15106 "pmaxsw $dst,$src2\t! " %} 15107 ins_encode %{ 15108 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15109 __ pmaxsw($dst$$XMMRegister, $src2$$XMMRegister); 15110 %} 15111 ins_pipe( pipe_slow ); 15112 %} 15113 15114 instruct max8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15115 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15116 match(Set dst (MaxV src1 src2)); 15117 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15118 ins_encode %{ 15119 int vector_len = 0; 15120 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15121 %} 15122 ins_pipe( pipe_slow ); 15123 %} 15124 15125 instruct max16S_reg(vecY dst, vecY src1, vecY src2) %{ 15126 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15127 match(Set dst (MaxV src1 src2)); 15128 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15129 ins_encode %{ 15130 int vector_len = 1; 15131 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15132 %} 15133 ins_pipe( pipe_slow ); 15134 %} 15135 15136 instruct max32S_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15137 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 15138 match(Set dst (MaxV src1 src2)); 15139 format %{ "vpmaxsw $dst,$src1,$src2\t! " %} 15140 ins_encode %{ 15141 int vector_len = 2; 15142 __ vpmaxsw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15143 %} 15144 ins_pipe( pipe_slow ); 15145 %} 15146 15147 // Int vector Max 15148 instruct max2I_reg(vecD dst, vecD src1, vecD src2) %{ 15149 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15150 match(Set dst (MaxV src1 src2)); 15151 effect(TEMP dst); 15152 format %{ "movdqu $dst,$src1\n\t" 15153 "pmaxsd $dst,$src2\t! " %} 15154 ins_encode %{ 15155 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15156 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 15157 %} 15158 ins_pipe( pipe_slow ); 15159 %} 15160 15161 instruct max2I_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15162 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15163 match(Set dst (MaxV src1 src2)); 15164 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15165 ins_encode %{ 15166 int vector_len = 0; 15167 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15168 %} 15169 ins_pipe( pipe_slow ); 15170 %} 15171 15172 instruct max4I_reg(vecX dst, vecX src1, vecX src2) %{ 15173 predicate(UseSSE > 3 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15174 match(Set dst (MaxV src1 src2)); 15175 effect(TEMP dst); 15176 format %{ "movdqu $dst,$src1\n\t" 15177 "pmaxsd $dst,$src2\t! " %} 15178 ins_encode %{ 15179 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15180 __ pmaxsd($dst$$XMMRegister, $src2$$XMMRegister); 15181 %} 15182 ins_pipe( pipe_slow ); 15183 %} 15184 15185 instruct max4I_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15186 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15187 match(Set dst (MaxV src1 src2)); 15188 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15189 ins_encode %{ 15190 int vector_len = 0; 15191 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15192 %} 15193 ins_pipe( pipe_slow ); 15194 %} 15195 15196 instruct max4I_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15197 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15198 match(Set dst (MaxV src1 src2)); 15199 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15200 ins_encode %{ 15201 int vector_len = 0; 15202 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15203 %} 15204 ins_pipe( pipe_slow ); 15205 %} 15206 15207 instruct max8I_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15208 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15209 match(Set dst (MaxV src1 src2)); 15210 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15211 ins_encode %{ 15212 int vector_len = 1; 15213 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15214 %} 15215 ins_pipe( pipe_slow ); 15216 %} 15217 15218 instruct max8I_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15219 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15220 match(Set dst (MaxV src1 src2)); 15221 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15222 ins_encode %{ 15223 int vector_len = 1; 15224 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15225 %} 15226 ins_pipe( pipe_slow ); 15227 %} 15228 15229 instruct max16I_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15230 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 15231 match(Set dst (MaxV src1 src2)); 15232 format %{ "vpmaxsd $dst,$src1,$src2\t! " %} 15233 ins_encode %{ 15234 int vector_len = 2; 15235 __ vpmaxsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15236 %} 15237 ins_pipe( pipe_slow ); 15238 %} 15239 15240 // Long Vector Max 15241 instruct maxL_reg(vecD dst, vecD src1, vecD src2, rxmm0 tmp) %{ 15242 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15243 match(Set dst (MaxV src1 src2)); 15244 effect(TEMP dst, TEMP tmp); 15245 format %{ "movsd $tmp,$src1\n\t" 15246 "movsd $dst,$src1\n\t" 15247 "pcmpgtq $tmp,$src2\n\t" 15248 "blendvpd $dst,$src2\t! " %} 15249 ins_encode %{ 15250 __ movsd($tmp$$XMMRegister, $src1$$XMMRegister); 15251 __ movsd($dst$$XMMRegister, $src2$$XMMRegister); 15252 __ pcmpgtq($tmp$$XMMRegister, $src2$$XMMRegister); 15253 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister); 15254 %} 15255 ins_pipe( pipe_slow ); 15256 %} 15257 15258 instruct max1L_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15259 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15260 match(Set dst (MaxV src1 src2)); 15261 effect(TEMP dst); 15262 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 15263 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 15264 ins_encode %{ 15265 int vector_len = 0; 15266 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15267 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 15268 %} 15269 ins_pipe( pipe_slow ); 15270 %} 15271 15272 instruct max2L_reg(vecX dst, vecX src1, vecX src2, rxmm0 tmp) %{ 15273 predicate(UseAVX == 0 && UseSSE >= 4 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15274 match(Set dst (MaxV src1 src2)); 15275 effect(TEMP dst, TEMP tmp); 15276 format %{ "movdqu $tmp,$src2\n\t" 15277 "movdqu $dst,$src1\n\t" 15278 "pcmpgtq $tmp,$src1\n\t" 15279 "blendvpd $dst,$src2\t! " %} 15280 ins_encode %{ 15281 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 15282 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15283 __ pcmpgtq($tmp$$XMMRegister, $src1$$XMMRegister); 15284 __ blendvpd($dst$$XMMRegister, $src2$$XMMRegister); 15285 %} 15286 ins_pipe( pipe_slow ); 15287 %} 15288 15289 instruct max2L_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15290 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15291 match(Set dst (MaxV src1 src2)); 15292 effect(TEMP dst); 15293 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 15294 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 15295 ins_encode %{ 15296 int vector_len = 0; 15297 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15298 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 15299 %} 15300 ins_pipe( pipe_slow ); 15301 %} 15302 15303 instruct max2L_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15304 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15305 match(Set dst (MaxV src1 src2)); 15306 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 15307 ins_encode %{ 15308 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 0); 15309 %} 15310 ins_pipe( pipe_slow ); 15311 %} 15312 15313 instruct max4L_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15314 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15315 match(Set dst (MaxV src1 src2)); 15316 effect(TEMP dst); 15317 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 15318 "vblendvpd $dst,$src2,$src1,$dst\t! " %} 15319 ins_encode %{ 15320 int vector_len = 1; 15321 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15322 __ vblendvpd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $dst$$XMMRegister, vector_len); 15323 %} 15324 ins_pipe( pipe_slow ); 15325 %} 15326 15327 instruct max4L_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15328 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15329 match(Set dst (MaxV src1 src2)); 15330 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 15331 ins_encode %{ 15332 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 1); 15333 %} 15334 ins_pipe( pipe_slow ); 15335 %} 15336 15337 instruct max8L_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15338 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 15339 match(Set dst (MaxV src1 src2)); 15340 format %{ "vpmaxsq $dst,$src1,src2\t! " %} 15341 ins_encode %{ 15342 __ vpmaxsq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2); 15343 %} 15344 ins_pipe( pipe_slow ); 15345 %} 15346 15347 // Float Vector Max 15348 instruct max2F_reg(vecD dst, vecD src1, vecD src2) %{ 15349 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15350 match(Set dst (MaxV src1 src2)); 15351 effect(TEMP dst); 15352 format %{ "movsd $dst,$src1\n\t" 15353 "maxps $dst,$src2\t! " %} 15354 ins_encode %{ 15355 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15356 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 15357 %} 15358 ins_pipe( pipe_slow ); 15359 %} 15360 15361 instruct max2F_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15362 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15363 match(Set dst (MaxV src1 src2)); 15364 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15365 ins_encode %{ 15366 int vector_len = 0; 15367 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15368 %} 15369 ins_pipe( pipe_slow ); 15370 %} 15371 15372 instruct max4F_reg(vecX dst, vecX src1, vecX src2) %{ 15373 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15374 match(Set dst (MaxV src1 src2)); 15375 effect(TEMP dst); 15376 format %{ "movdqu $dst,$src1\n\t" 15377 "maxps $dst,$src2\t! " %} 15378 ins_encode %{ 15379 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15380 __ maxps($dst$$XMMRegister, $src2$$XMMRegister); 15381 %} 15382 ins_pipe( pipe_slow ); 15383 %} 15384 15385 instruct max4F_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15386 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15387 match(Set dst (MaxV src1 src2)); 15388 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15389 ins_encode %{ 15390 int vector_len = 0; 15391 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15392 %} 15393 ins_pipe( pipe_slow ); 15394 %} 15395 15396 instruct max4F_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15397 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15398 match(Set dst (MaxV src1 src2)); 15399 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15400 ins_encode %{ 15401 int vector_len = 0; 15402 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15403 %} 15404 ins_pipe( pipe_slow ); 15405 %} 15406 15407 instruct max8F_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15408 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15409 match(Set dst (MaxV src1 src2)); 15410 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15411 ins_encode %{ 15412 int vector_len = 1; 15413 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15414 %} 15415 ins_pipe( pipe_slow ); 15416 %} 15417 15418 instruct max8F_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15419 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15420 match(Set dst (MaxV src1 src2)); 15421 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15422 ins_encode %{ 15423 int vector_len = 1; 15424 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15425 %} 15426 ins_pipe( pipe_slow ); 15427 %} 15428 15429 instruct max16F_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15430 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 15431 match(Set dst (MaxV src1 src2)); 15432 format %{ "vmaxps $dst,$src1,$src2\t! " %} 15433 ins_encode %{ 15434 int vector_len = 2; 15435 __ vmaxps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15436 %} 15437 ins_pipe( pipe_slow ); 15438 %} 15439 15440 // Double Vector Max 15441 instruct maxD_reg(vecD dst, vecD src1, vecD src2) %{ 15442 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15443 match(Set dst (MaxV src1 src2)); 15444 effect(TEMP dst); 15445 format %{ "movsd $dst,$src1\n\t" 15446 "maxpd $dst,$src2\t! " %} 15447 ins_encode %{ 15448 __ movsd($dst$$XMMRegister, $src1$$XMMRegister); 15449 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 15450 %} 15451 ins_pipe( pipe_slow ); 15452 %} 15453 15454 instruct max1D_reg_avx(vecD dst, vecD src1, vecD src2) %{ 15455 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15456 match(Set dst (MaxV src1 src2)); 15457 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15458 ins_encode %{ 15459 int vector_len = 0; 15460 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15461 %} 15462 ins_pipe( pipe_slow ); 15463 %} 15464 15465 instruct max2D_reg(vecX dst, vecX src1, vecX src2) %{ 15466 predicate(UseSSE > 0 && UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15467 match(Set dst (MaxV src1 src2)); 15468 effect(TEMP dst); 15469 format %{ "movdqu $dst,$src1\n\t" 15470 "maxpd $dst,$src2\t! " %} 15471 ins_encode %{ 15472 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 15473 __ maxpd($dst$$XMMRegister, $src2$$XMMRegister); 15474 %} 15475 ins_pipe( pipe_slow ); 15476 %} 15477 15478 instruct max2D_reg_avx(vecX dst, vecX src1, vecX src2) %{ 15479 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15480 match(Set dst (MaxV src1 src2)); 15481 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15482 ins_encode %{ 15483 int vector_len = 0; 15484 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15485 %} 15486 ins_pipe( pipe_slow ); 15487 %} 15488 15489 instruct max2D_reg_evex(vecX dst, vecX src1, vecX src2) %{ 15490 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15491 match(Set dst (MaxV src1 src2)); 15492 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15493 ins_encode %{ 15494 int vector_len = 0; 15495 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15496 %} 15497 ins_pipe( pipe_slow ); 15498 %} 15499 15500 instruct max4D_reg_avx(vecY dst, vecY src1, vecY src2) %{ 15501 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15502 match(Set dst (MaxV src1 src2)); 15503 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15504 ins_encode %{ 15505 int vector_len = 1; 15506 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15507 %} 15508 ins_pipe( pipe_slow ); 15509 %} 15510 15511 instruct max4D_reg_evex(vecY dst, vecY src1, vecY src2) %{ 15512 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15513 match(Set dst (MaxV src1 src2)); 15514 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15515 ins_encode %{ 15516 int vector_len = 1; 15517 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15518 %} 15519 ins_pipe( pipe_slow ); 15520 %} 15521 15522 instruct max8D_reg_evex(vecZ dst, vecZ src1, vecZ src2) %{ 15523 predicate(UseAVX > 2 && VM_Version::supports_avx512vl() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 15524 match(Set dst (MaxV src1 src2)); 15525 format %{ "vmaxpd $dst,$src1,$src2\t! " %} 15526 ins_encode %{ 15527 int vector_len = 2; 15528 __ vmaxpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 15529 %} 15530 ins_pipe( pipe_slow ); 15531 %} 15532 15533 // ------------------------------ Shift --------------------------------------- 15534 15535 // Left and right shift count vectors are the same on x86 15536 // (only lowest bits of xmm reg are used for count). 15537 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 15538 match(Set dst (LShiftCntV cnt)); 15539 match(Set dst (RShiftCntV cnt)); 15540 format %{ "movd $dst,$cnt\t! load shift count" %} 15541 ins_encode %{ 15542 __ movdl($dst$$XMMRegister, $cnt$$Register); 15543 %} 15544 ins_pipe( pipe_slow ); 15545 %} 15546 15547 // --------------------------------- Sqrt -------------------------------------- 15548 15549 // Floating point vector sqrt 15550 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 15551 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15552 match(Set dst (SqrtVD src)); 15553 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 15554 ins_encode %{ 15555 int vector_len = 0; 15556 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15557 %} 15558 ins_pipe( pipe_slow ); 15559 %} 15560 15561 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 15562 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15563 match(Set dst (SqrtVD (LoadVector mem))); 15564 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 15565 ins_encode %{ 15566 int vector_len = 0; 15567 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 15568 %} 15569 ins_pipe( pipe_slow ); 15570 %} 15571 15572 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 15573 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15574 match(Set dst (SqrtVD src)); 15575 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 15576 ins_encode %{ 15577 int vector_len = 1; 15578 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15579 %} 15580 ins_pipe( pipe_slow ); 15581 %} 15582 15583 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 15584 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15585 match(Set dst (SqrtVD (LoadVector mem))); 15586 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 15587 ins_encode %{ 15588 int vector_len = 1; 15589 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 15590 %} 15591 ins_pipe( pipe_slow ); 15592 %} 15593 15594 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 15595 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 15596 match(Set dst (SqrtVD src)); 15597 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 15598 ins_encode %{ 15599 int vector_len = 2; 15600 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15601 %} 15602 ins_pipe( pipe_slow ); 15603 %} 15604 15605 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 15606 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 15607 match(Set dst (SqrtVD (LoadVector mem))); 15608 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 15609 ins_encode %{ 15610 int vector_len = 2; 15611 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 15612 %} 15613 ins_pipe( pipe_slow ); 15614 %} 15615 15616 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 15617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15618 match(Set dst (SqrtVF src)); 15619 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 15620 ins_encode %{ 15621 int vector_len = 0; 15622 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15623 %} 15624 ins_pipe( pipe_slow ); 15625 %} 15626 15627 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 15628 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 15629 match(Set dst (SqrtVF (LoadVector mem))); 15630 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 15631 ins_encode %{ 15632 int vector_len = 0; 15633 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15634 %} 15635 ins_pipe( pipe_slow ); 15636 %} 15637 15638 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 15639 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15640 match(Set dst (SqrtVF src)); 15641 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 15642 ins_encode %{ 15643 int vector_len = 0; 15644 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15645 %} 15646 ins_pipe( pipe_slow ); 15647 %} 15648 15649 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 15650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 15651 match(Set dst (SqrtVF (LoadVector mem))); 15652 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 15653 ins_encode %{ 15654 int vector_len = 0; 15655 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15656 %} 15657 ins_pipe( pipe_slow ); 15658 %} 15659 15660 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 15661 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 15662 match(Set dst (SqrtVF src)); 15663 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 15664 ins_encode %{ 15665 int vector_len = 1; 15666 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15667 %} 15668 ins_pipe( pipe_slow ); 15669 %} 15670 15671 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 15672 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 15673 match(Set dst (SqrtVF (LoadVector mem))); 15674 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 15675 ins_encode %{ 15676 int vector_len = 1; 15677 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15678 %} 15679 ins_pipe( pipe_slow ); 15680 %} 15681 15682 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 15683 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 15684 match(Set dst (SqrtVF src)); 15685 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 15686 ins_encode %{ 15687 int vector_len = 2; 15688 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 15689 %} 15690 ins_pipe( pipe_slow ); 15691 %} 15692 15693 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 15694 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 15695 match(Set dst (SqrtVF (LoadVector mem))); 15696 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 15697 ins_encode %{ 15698 int vector_len = 2; 15699 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 15700 %} 15701 ins_pipe( pipe_slow ); 15702 %} 15703 15704 // ------------------------------ LeftShift ----------------------------------- 15705 15706 // Shorts/Chars vector left shift 15707 instruct vsll2S(vecS dst, vecS shift) %{ 15708 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 15709 match(Set dst (LShiftVS dst shift)); 15710 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 15711 ins_encode %{ 15712 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 15713 %} 15714 ins_pipe( pipe_slow ); 15715 %} 15716 15717 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 15718 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 15719 match(Set dst (LShiftVS dst shift)); 15720 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 15721 ins_encode %{ 15722 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 15723 %} 15724 ins_pipe( pipe_slow ); 15725 %} 15726 15727 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 15728 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 15729 match(Set dst (LShiftVS src shift)); 15730 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15731 ins_encode %{ 15732 int vector_len = 0; 15733 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15734 %} 15735 ins_pipe( pipe_slow ); 15736 %} 15737 15738 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 15739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 15740 match(Set dst (LShiftVS src shift)); 15741 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15742 ins_encode %{ 15743 int vector_len = 0; 15744 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15745 %} 15746 ins_pipe( pipe_slow ); 15747 %} 15748 15749 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 15750 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 15751 match(Set dst (LShiftVS dst shift)); 15752 effect(TEMP src); 15753 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15754 ins_encode %{ 15755 int vector_len = 0; 15756 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15757 %} 15758 ins_pipe( pipe_slow ); 15759 %} 15760 15761 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 15762 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 15763 match(Set dst (LShiftVS src shift)); 15764 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15765 ins_encode %{ 15766 int vector_len = 0; 15767 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15768 %} 15769 ins_pipe( pipe_slow ); 15770 %} 15771 15772 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 15773 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 15774 match(Set dst (LShiftVS src shift)); 15775 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15776 ins_encode %{ 15777 int vector_len = 0; 15778 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15779 %} 15780 ins_pipe( pipe_slow ); 15781 %} 15782 15783 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 15784 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 15785 match(Set dst (LShiftVS dst shift)); 15786 effect(TEMP src); 15787 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 15788 ins_encode %{ 15789 int vector_len = 0; 15790 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15791 %} 15792 ins_pipe( pipe_slow ); 15793 %} 15794 15795 instruct vsll4S(vecD dst, vecS shift) %{ 15796 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 15797 match(Set dst (LShiftVS dst shift)); 15798 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 15799 ins_encode %{ 15800 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 15801 %} 15802 ins_pipe( pipe_slow ); 15803 %} 15804 15805 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 15806 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 15807 match(Set dst (LShiftVS dst shift)); 15808 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 15809 ins_encode %{ 15810 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 15811 %} 15812 ins_pipe( pipe_slow ); 15813 %} 15814 15815 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 15816 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 15817 match(Set dst (LShiftVS src shift)); 15818 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15819 ins_encode %{ 15820 int vector_len = 0; 15821 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15822 %} 15823 ins_pipe( pipe_slow ); 15824 %} 15825 15826 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 15827 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 15828 match(Set dst (LShiftVS src shift)); 15829 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15830 ins_encode %{ 15831 int vector_len = 0; 15832 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15833 %} 15834 ins_pipe( pipe_slow ); 15835 %} 15836 15837 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 15838 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 15839 match(Set dst (LShiftVS dst shift)); 15840 effect(TEMP src); 15841 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15842 ins_encode %{ 15843 int vector_len = 0; 15844 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15845 %} 15846 ins_pipe( pipe_slow ); 15847 %} 15848 15849 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 15850 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 15851 match(Set dst (LShiftVS src shift)); 15852 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15853 ins_encode %{ 15854 int vector_len = 0; 15855 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15856 %} 15857 ins_pipe( pipe_slow ); 15858 %} 15859 15860 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 15861 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 15862 match(Set dst (LShiftVS src shift)); 15863 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15864 ins_encode %{ 15865 int vector_len = 0; 15866 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15867 %} 15868 ins_pipe( pipe_slow ); 15869 %} 15870 15871 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 15872 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 15873 match(Set dst (LShiftVS dst shift)); 15874 effect(TEMP src); 15875 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 15876 ins_encode %{ 15877 int vector_len = 0; 15878 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15879 %} 15880 ins_pipe( pipe_slow ); 15881 %} 15882 15883 instruct vsll8S(vecX dst, vecS shift) %{ 15884 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 15885 match(Set dst (LShiftVS dst shift)); 15886 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 15887 ins_encode %{ 15888 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 15889 %} 15890 ins_pipe( pipe_slow ); 15891 %} 15892 15893 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 15894 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 15895 match(Set dst (LShiftVS dst shift)); 15896 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 15897 ins_encode %{ 15898 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 15899 %} 15900 ins_pipe( pipe_slow ); 15901 %} 15902 15903 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 15904 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 15905 match(Set dst (LShiftVS src shift)); 15906 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15907 ins_encode %{ 15908 int vector_len = 0; 15909 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15910 %} 15911 ins_pipe( pipe_slow ); 15912 %} 15913 15914 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 15915 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 15916 match(Set dst (LShiftVS src shift)); 15917 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15918 ins_encode %{ 15919 int vector_len = 0; 15920 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15921 %} 15922 ins_pipe( pipe_slow ); 15923 %} 15924 15925 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 15926 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 15927 match(Set dst (LShiftVS dst shift)); 15928 effect(TEMP src); 15929 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15930 ins_encode %{ 15931 int vector_len = 0; 15932 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15933 %} 15934 ins_pipe( pipe_slow ); 15935 %} 15936 15937 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 15938 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 15939 match(Set dst (LShiftVS src shift)); 15940 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15941 ins_encode %{ 15942 int vector_len = 0; 15943 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15944 %} 15945 ins_pipe( pipe_slow ); 15946 %} 15947 15948 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 15949 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 15950 match(Set dst (LShiftVS src shift)); 15951 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15952 ins_encode %{ 15953 int vector_len = 0; 15954 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15955 %} 15956 ins_pipe( pipe_slow ); 15957 %} 15958 15959 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 15960 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 15961 match(Set dst (LShiftVS dst shift)); 15962 effect(TEMP src); 15963 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 15964 ins_encode %{ 15965 int vector_len = 0; 15966 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 15967 %} 15968 ins_pipe( pipe_slow ); 15969 %} 15970 15971 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 15972 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 15973 match(Set dst (LShiftVS src shift)); 15974 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 15975 ins_encode %{ 15976 int vector_len = 1; 15977 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15978 %} 15979 ins_pipe( pipe_slow ); 15980 %} 15981 15982 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 15983 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 15984 match(Set dst (LShiftVS src shift)); 15985 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 15986 ins_encode %{ 15987 int vector_len = 1; 15988 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 15989 %} 15990 ins_pipe( pipe_slow ); 15991 %} 15992 15993 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 15994 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 15995 match(Set dst (LShiftVS dst shift)); 15996 effect(TEMP src); 15997 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 15998 ins_encode %{ 15999 int vector_len = 1; 16000 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16001 %} 16002 ins_pipe( pipe_slow ); 16003 %} 16004 16005 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 16006 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 16007 match(Set dst (LShiftVS src shift)); 16008 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16009 ins_encode %{ 16010 int vector_len = 1; 16011 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16012 %} 16013 ins_pipe( pipe_slow ); 16014 %} 16015 16016 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 16017 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 16018 match(Set dst (LShiftVS src shift)); 16019 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16020 ins_encode %{ 16021 int vector_len = 1; 16022 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16023 %} 16024 ins_pipe( pipe_slow ); 16025 %} 16026 16027 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 16028 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16029 match(Set dst (LShiftVS dst shift)); 16030 effect(TEMP src); 16031 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 16032 ins_encode %{ 16033 int vector_len = 1; 16034 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16035 %} 16036 ins_pipe( pipe_slow ); 16037 %} 16038 16039 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 16040 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16041 match(Set dst (LShiftVS src shift)); 16042 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 16043 ins_encode %{ 16044 int vector_len = 2; 16045 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16046 %} 16047 ins_pipe( pipe_slow ); 16048 %} 16049 16050 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16051 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16052 match(Set dst (LShiftVS src shift)); 16053 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 16054 ins_encode %{ 16055 int vector_len = 2; 16056 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16057 %} 16058 ins_pipe( pipe_slow ); 16059 %} 16060 16061 // Integers vector left shift 16062 instruct vsll2I(vecD dst, vecS shift) %{ 16063 predicate(n->as_Vector()->length() == 2); 16064 match(Set dst (LShiftVI dst shift)); 16065 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 16066 ins_encode %{ 16067 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 16068 %} 16069 ins_pipe( pipe_slow ); 16070 %} 16071 16072 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 16073 predicate(n->as_Vector()->length() == 2); 16074 match(Set dst (LShiftVI dst shift)); 16075 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 16076 ins_encode %{ 16077 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 16078 %} 16079 ins_pipe( pipe_slow ); 16080 %} 16081 16082 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 16083 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16084 match(Set dst (LShiftVI src shift)); 16085 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 16086 ins_encode %{ 16087 int vector_len = 0; 16088 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16089 %} 16090 ins_pipe( pipe_slow ); 16091 %} 16092 16093 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 16094 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16095 match(Set dst (LShiftVI src shift)); 16096 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 16097 ins_encode %{ 16098 int vector_len = 0; 16099 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16100 %} 16101 ins_pipe( pipe_slow ); 16102 %} 16103 16104 instruct vsll4I(vecX dst, vecS shift) %{ 16105 predicate(n->as_Vector()->length() == 4); 16106 match(Set dst (LShiftVI dst shift)); 16107 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 16108 ins_encode %{ 16109 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 16110 %} 16111 ins_pipe( pipe_slow ); 16112 %} 16113 16114 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 16115 predicate(n->as_Vector()->length() == 4); 16116 match(Set dst (LShiftVI dst shift)); 16117 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 16118 ins_encode %{ 16119 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 16120 %} 16121 ins_pipe( pipe_slow ); 16122 %} 16123 16124 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 16125 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16126 match(Set dst (LShiftVI src shift)); 16127 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 16128 ins_encode %{ 16129 int vector_len = 0; 16130 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16131 %} 16132 ins_pipe( pipe_slow ); 16133 %} 16134 16135 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16136 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16137 match(Set dst (LShiftVI src shift)); 16138 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 16139 ins_encode %{ 16140 int vector_len = 0; 16141 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16142 %} 16143 ins_pipe( pipe_slow ); 16144 %} 16145 16146 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 16147 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16148 match(Set dst (LShiftVI src shift)); 16149 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 16150 ins_encode %{ 16151 int vector_len = 1; 16152 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16153 %} 16154 ins_pipe( pipe_slow ); 16155 %} 16156 16157 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16158 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16159 match(Set dst (LShiftVI src shift)); 16160 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 16161 ins_encode %{ 16162 int vector_len = 1; 16163 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16164 %} 16165 ins_pipe( pipe_slow ); 16166 %} 16167 16168 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 16169 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16170 match(Set dst (LShiftVI src shift)); 16171 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 16172 ins_encode %{ 16173 int vector_len = 2; 16174 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16175 %} 16176 ins_pipe( pipe_slow ); 16177 %} 16178 16179 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16180 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16181 match(Set dst (LShiftVI src shift)); 16182 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 16183 ins_encode %{ 16184 int vector_len = 2; 16185 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16186 %} 16187 ins_pipe( pipe_slow ); 16188 %} 16189 16190 // Longs vector left shift 16191 instruct vsll2L(vecX dst, vecS shift) %{ 16192 predicate(n->as_Vector()->length() == 2); 16193 match(Set dst (LShiftVL dst shift)); 16194 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 16195 ins_encode %{ 16196 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 16197 %} 16198 ins_pipe( pipe_slow ); 16199 %} 16200 16201 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 16202 predicate(n->as_Vector()->length() == 2); 16203 match(Set dst (LShiftVL dst shift)); 16204 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 16205 ins_encode %{ 16206 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 16207 %} 16208 ins_pipe( pipe_slow ); 16209 %} 16210 16211 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 16212 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16213 match(Set dst (LShiftVL src shift)); 16214 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 16215 ins_encode %{ 16216 int vector_len = 0; 16217 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16218 %} 16219 ins_pipe( pipe_slow ); 16220 %} 16221 16222 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16223 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16224 match(Set dst (LShiftVL src shift)); 16225 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 16226 ins_encode %{ 16227 int vector_len = 0; 16228 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16229 %} 16230 ins_pipe( pipe_slow ); 16231 %} 16232 16233 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 16234 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16235 match(Set dst (LShiftVL src shift)); 16236 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 16237 ins_encode %{ 16238 int vector_len = 1; 16239 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16240 %} 16241 ins_pipe( pipe_slow ); 16242 %} 16243 16244 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16245 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16246 match(Set dst (LShiftVL src shift)); 16247 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 16248 ins_encode %{ 16249 int vector_len = 1; 16250 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16251 %} 16252 ins_pipe( pipe_slow ); 16253 %} 16254 16255 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 16256 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16257 match(Set dst (LShiftVL src shift)); 16258 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 16259 ins_encode %{ 16260 int vector_len = 2; 16261 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16262 %} 16263 ins_pipe( pipe_slow ); 16264 %} 16265 16266 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16267 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16268 match(Set dst (LShiftVL src shift)); 16269 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 16270 ins_encode %{ 16271 int vector_len = 2; 16272 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16273 %} 16274 ins_pipe( pipe_slow ); 16275 %} 16276 16277 // ----------------------- LogicalRightShift ----------------------------------- 16278 16279 // Shorts vector logical right shift produces incorrect Java result 16280 // for negative data because java code convert short value into int with 16281 // sign extension before a shift. But char vectors are fine since chars are 16282 // unsigned values. 16283 16284 instruct vsrl2S(vecS dst, vecS shift) %{ 16285 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 16286 match(Set dst (URShiftVS dst shift)); 16287 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 16288 ins_encode %{ 16289 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 16290 %} 16291 ins_pipe( pipe_slow ); 16292 %} 16293 16294 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 16295 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 16296 match(Set dst (URShiftVS dst shift)); 16297 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 16298 ins_encode %{ 16299 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 16300 %} 16301 ins_pipe( pipe_slow ); 16302 %} 16303 16304 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 16305 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16306 match(Set dst (URShiftVS src shift)); 16307 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16308 ins_encode %{ 16309 int vector_len = 0; 16310 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16311 %} 16312 ins_pipe( pipe_slow ); 16313 %} 16314 16315 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 16316 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16317 match(Set dst (URShiftVS src shift)); 16318 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16319 ins_encode %{ 16320 int vector_len = 0; 16321 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16322 %} 16323 ins_pipe( pipe_slow ); 16324 %} 16325 16326 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 16327 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16328 match(Set dst (URShiftVS dst shift)); 16329 effect(TEMP src); 16330 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16331 ins_encode %{ 16332 int vector_len = 0; 16333 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16334 %} 16335 ins_pipe( pipe_slow ); 16336 %} 16337 16338 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 16339 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16340 match(Set dst (URShiftVS src shift)); 16341 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16342 ins_encode %{ 16343 int vector_len = 0; 16344 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16345 %} 16346 ins_pipe( pipe_slow ); 16347 %} 16348 16349 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 16350 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16351 match(Set dst (URShiftVS src shift)); 16352 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16353 ins_encode %{ 16354 int vector_len = 0; 16355 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16356 %} 16357 ins_pipe( pipe_slow ); 16358 %} 16359 16360 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 16361 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16362 match(Set dst (URShiftVS dst shift)); 16363 effect(TEMP src); 16364 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 16365 ins_encode %{ 16366 int vector_len = 0; 16367 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16368 %} 16369 ins_pipe( pipe_slow ); 16370 %} 16371 16372 instruct vsrl4S(vecD dst, vecS shift) %{ 16373 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16374 match(Set dst (URShiftVS dst shift)); 16375 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 16376 ins_encode %{ 16377 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 16378 %} 16379 ins_pipe( pipe_slow ); 16380 %} 16381 16382 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 16383 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16384 match(Set dst (URShiftVS dst shift)); 16385 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 16386 ins_encode %{ 16387 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 16388 %} 16389 ins_pipe( pipe_slow ); 16390 %} 16391 16392 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 16393 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 16394 match(Set dst (URShiftVS src shift)); 16395 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16396 ins_encode %{ 16397 int vector_len = 0; 16398 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16399 %} 16400 ins_pipe( pipe_slow ); 16401 %} 16402 16403 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 16404 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 16405 match(Set dst (URShiftVS src shift)); 16406 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16407 ins_encode %{ 16408 int vector_len = 0; 16409 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16410 %} 16411 ins_pipe( pipe_slow ); 16412 %} 16413 16414 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 16415 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 16416 match(Set dst (URShiftVS dst shift)); 16417 effect(TEMP src); 16418 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16419 ins_encode %{ 16420 int vector_len = 0; 16421 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16422 %} 16423 ins_pipe( pipe_slow ); 16424 %} 16425 16426 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 16427 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 16428 match(Set dst (URShiftVS src shift)); 16429 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16430 ins_encode %{ 16431 int vector_len = 0; 16432 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16433 %} 16434 ins_pipe( pipe_slow ); 16435 %} 16436 16437 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 16438 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 16439 match(Set dst (URShiftVS src shift)); 16440 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16441 ins_encode %{ 16442 int vector_len = 0; 16443 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16444 %} 16445 ins_pipe( pipe_slow ); 16446 %} 16447 16448 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 16449 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 16450 match(Set dst (URShiftVS dst shift)); 16451 effect(TEMP src); 16452 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 16453 ins_encode %{ 16454 int vector_len = 0; 16455 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16456 %} 16457 ins_pipe( pipe_slow ); 16458 %} 16459 16460 instruct vsrl8S(vecX dst, vecS shift) %{ 16461 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 16462 match(Set dst (URShiftVS dst shift)); 16463 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 16464 ins_encode %{ 16465 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 16466 %} 16467 ins_pipe( pipe_slow ); 16468 %} 16469 16470 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 16471 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 16472 match(Set dst (URShiftVS dst shift)); 16473 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 16474 ins_encode %{ 16475 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 16476 %} 16477 ins_pipe( pipe_slow ); 16478 %} 16479 16480 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 16481 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 16482 match(Set dst (URShiftVS src shift)); 16483 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16484 ins_encode %{ 16485 int vector_len = 0; 16486 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16487 %} 16488 ins_pipe( pipe_slow ); 16489 %} 16490 16491 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 16492 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 16493 match(Set dst (URShiftVS src shift)); 16494 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16495 ins_encode %{ 16496 int vector_len = 0; 16497 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16498 %} 16499 ins_pipe( pipe_slow ); 16500 %} 16501 16502 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 16503 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 16504 match(Set dst (URShiftVS dst shift)); 16505 effect(TEMP src); 16506 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16507 ins_encode %{ 16508 int vector_len = 0; 16509 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16510 %} 16511 ins_pipe( pipe_slow ); 16512 %} 16513 16514 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 16515 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 16516 match(Set dst (URShiftVS src shift)); 16517 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16518 ins_encode %{ 16519 int vector_len = 0; 16520 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16521 %} 16522 ins_pipe( pipe_slow ); 16523 %} 16524 16525 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 16526 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 16527 match(Set dst (URShiftVS src shift)); 16528 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16529 ins_encode %{ 16530 int vector_len = 0; 16531 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16532 %} 16533 ins_pipe( pipe_slow ); 16534 %} 16535 16536 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 16537 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 16538 match(Set dst (URShiftVS dst shift)); 16539 effect(TEMP src); 16540 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 16541 ins_encode %{ 16542 int vector_len = 0; 16543 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16544 %} 16545 ins_pipe( pipe_slow ); 16546 %} 16547 16548 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 16549 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 16550 match(Set dst (URShiftVS src shift)); 16551 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16552 ins_encode %{ 16553 int vector_len = 1; 16554 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16555 %} 16556 ins_pipe( pipe_slow ); 16557 %} 16558 16559 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 16560 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 16561 match(Set dst (URShiftVS src shift)); 16562 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16563 ins_encode %{ 16564 int vector_len = 1; 16565 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16566 %} 16567 ins_pipe( pipe_slow ); 16568 %} 16569 16570 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 16571 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16572 match(Set dst (URShiftVS dst shift)); 16573 effect(TEMP src); 16574 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16575 ins_encode %{ 16576 int vector_len = 1; 16577 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16578 %} 16579 ins_pipe( pipe_slow ); 16580 %} 16581 16582 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 16583 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 16584 match(Set dst (URShiftVS src shift)); 16585 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16586 ins_encode %{ 16587 int vector_len = 1; 16588 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16589 %} 16590 ins_pipe( pipe_slow ); 16591 %} 16592 16593 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 16594 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 16595 match(Set dst (URShiftVS src shift)); 16596 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16597 ins_encode %{ 16598 int vector_len = 1; 16599 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16600 %} 16601 ins_pipe( pipe_slow ); 16602 %} 16603 16604 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 16605 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 16606 match(Set dst (URShiftVS dst shift)); 16607 effect(TEMP src); 16608 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 16609 ins_encode %{ 16610 int vector_len = 1; 16611 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16612 %} 16613 ins_pipe( pipe_slow ); 16614 %} 16615 16616 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 16617 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16618 match(Set dst (URShiftVS src shift)); 16619 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 16620 ins_encode %{ 16621 int vector_len = 2; 16622 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16623 %} 16624 ins_pipe( pipe_slow ); 16625 %} 16626 16627 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16628 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 16629 match(Set dst (URShiftVS src shift)); 16630 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 16631 ins_encode %{ 16632 int vector_len = 2; 16633 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16634 %} 16635 ins_pipe( pipe_slow ); 16636 %} 16637 16638 // Integers vector logical right shift 16639 instruct vsrl2I(vecD dst, vecS shift) %{ 16640 predicate(n->as_Vector()->length() == 2); 16641 match(Set dst (URShiftVI dst shift)); 16642 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 16643 ins_encode %{ 16644 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 16645 %} 16646 ins_pipe( pipe_slow ); 16647 %} 16648 16649 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 16650 predicate(n->as_Vector()->length() == 2); 16651 match(Set dst (URShiftVI dst shift)); 16652 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 16653 ins_encode %{ 16654 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 16655 %} 16656 ins_pipe( pipe_slow ); 16657 %} 16658 16659 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 16660 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16661 match(Set dst (URShiftVI src shift)); 16662 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 16663 ins_encode %{ 16664 int vector_len = 0; 16665 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16666 %} 16667 ins_pipe( pipe_slow ); 16668 %} 16669 16670 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 16671 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16672 match(Set dst (URShiftVI src shift)); 16673 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 16674 ins_encode %{ 16675 int vector_len = 0; 16676 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16677 %} 16678 ins_pipe( pipe_slow ); 16679 %} 16680 16681 instruct vsrl4I(vecX dst, vecS shift) %{ 16682 predicate(n->as_Vector()->length() == 4); 16683 match(Set dst (URShiftVI dst shift)); 16684 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 16685 ins_encode %{ 16686 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 16687 %} 16688 ins_pipe( pipe_slow ); 16689 %} 16690 16691 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 16692 predicate(n->as_Vector()->length() == 4); 16693 match(Set dst (URShiftVI dst shift)); 16694 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 16695 ins_encode %{ 16696 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 16697 %} 16698 ins_pipe( pipe_slow ); 16699 %} 16700 16701 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 16702 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16703 match(Set dst (URShiftVI src shift)); 16704 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 16705 ins_encode %{ 16706 int vector_len = 0; 16707 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16708 %} 16709 ins_pipe( pipe_slow ); 16710 %} 16711 16712 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16713 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 16714 match(Set dst (URShiftVI src shift)); 16715 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 16716 ins_encode %{ 16717 int vector_len = 0; 16718 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16719 %} 16720 ins_pipe( pipe_slow ); 16721 %} 16722 16723 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 16724 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16725 match(Set dst (URShiftVI src shift)); 16726 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 16727 ins_encode %{ 16728 int vector_len = 1; 16729 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16730 %} 16731 ins_pipe( pipe_slow ); 16732 %} 16733 16734 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16735 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 16736 match(Set dst (URShiftVI src shift)); 16737 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 16738 ins_encode %{ 16739 int vector_len = 1; 16740 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16741 %} 16742 ins_pipe( pipe_slow ); 16743 %} 16744 16745 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 16746 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16747 match(Set dst (URShiftVI src shift)); 16748 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 16749 ins_encode %{ 16750 int vector_len = 2; 16751 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16752 %} 16753 ins_pipe( pipe_slow ); 16754 %} 16755 16756 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16757 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 16758 match(Set dst (URShiftVI src shift)); 16759 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 16760 ins_encode %{ 16761 int vector_len = 2; 16762 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16763 %} 16764 ins_pipe( pipe_slow ); 16765 %} 16766 16767 // Longs vector logical right shift 16768 instruct vsrl2L(vecX dst, vecS shift) %{ 16769 predicate(n->as_Vector()->length() == 2); 16770 match(Set dst (URShiftVL dst shift)); 16771 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 16772 ins_encode %{ 16773 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 16774 %} 16775 ins_pipe( pipe_slow ); 16776 %} 16777 16778 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 16779 predicate(n->as_Vector()->length() == 2); 16780 match(Set dst (URShiftVL dst shift)); 16781 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 16782 ins_encode %{ 16783 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 16784 %} 16785 ins_pipe( pipe_slow ); 16786 %} 16787 16788 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 16789 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16790 match(Set dst (URShiftVL src shift)); 16791 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 16792 ins_encode %{ 16793 int vector_len = 0; 16794 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16795 %} 16796 ins_pipe( pipe_slow ); 16797 %} 16798 16799 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 16800 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 16801 match(Set dst (URShiftVL src shift)); 16802 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 16803 ins_encode %{ 16804 int vector_len = 0; 16805 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16806 %} 16807 ins_pipe( pipe_slow ); 16808 %} 16809 16810 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 16811 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16812 match(Set dst (URShiftVL src shift)); 16813 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 16814 ins_encode %{ 16815 int vector_len = 1; 16816 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16817 %} 16818 ins_pipe( pipe_slow ); 16819 %} 16820 16821 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 16822 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 16823 match(Set dst (URShiftVL src shift)); 16824 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 16825 ins_encode %{ 16826 int vector_len = 1; 16827 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16828 %} 16829 ins_pipe( pipe_slow ); 16830 %} 16831 16832 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 16833 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16834 match(Set dst (URShiftVL src shift)); 16835 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 16836 ins_encode %{ 16837 int vector_len = 2; 16838 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16839 %} 16840 ins_pipe( pipe_slow ); 16841 %} 16842 16843 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 16844 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 16845 match(Set dst (URShiftVL src shift)); 16846 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 16847 ins_encode %{ 16848 int vector_len = 2; 16849 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16850 %} 16851 ins_pipe( pipe_slow ); 16852 %} 16853 16854 // ------------------- ArithmeticRightShift ----------------------------------- 16855 16856 // Shorts/Chars vector arithmetic right shift 16857 instruct vsra2S(vecS dst, vecS shift) %{ 16858 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 16859 match(Set dst (RShiftVS dst shift)); 16860 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 16861 ins_encode %{ 16862 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 16863 %} 16864 ins_pipe( pipe_slow ); 16865 %} 16866 16867 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 16868 predicate(n->as_Vector()->length() == 2); 16869 match(Set dst (RShiftVS dst shift)); 16870 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 16871 ins_encode %{ 16872 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 16873 %} 16874 ins_pipe( pipe_slow ); 16875 %} 16876 16877 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 16878 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16879 match(Set dst (RShiftVS src shift)); 16880 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16881 ins_encode %{ 16882 int vector_len = 0; 16883 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16884 %} 16885 ins_pipe( pipe_slow ); 16886 %} 16887 16888 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 16889 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16890 match(Set dst (RShiftVS src shift)); 16891 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16892 ins_encode %{ 16893 int vector_len = 0; 16894 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16895 %} 16896 ins_pipe( pipe_slow ); 16897 %} 16898 16899 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 16900 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16901 match(Set dst (RShiftVS dst shift)); 16902 effect(TEMP src); 16903 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16904 ins_encode %{ 16905 int vector_len = 0; 16906 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16907 %} 16908 ins_pipe( pipe_slow ); 16909 %} 16910 16911 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 16912 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 16913 match(Set dst (RShiftVS src shift)); 16914 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16915 ins_encode %{ 16916 int vector_len = 0; 16917 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16918 %} 16919 ins_pipe( pipe_slow ); 16920 %} 16921 16922 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 16923 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 16924 match(Set dst (RShiftVS src shift)); 16925 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16926 ins_encode %{ 16927 int vector_len = 0; 16928 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16929 %} 16930 ins_pipe( pipe_slow ); 16931 %} 16932 16933 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 16934 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 16935 match(Set dst (RShiftVS dst shift)); 16936 effect(TEMP src); 16937 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 16938 ins_encode %{ 16939 int vector_len = 0; 16940 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 16941 %} 16942 ins_pipe( pipe_slow ); 16943 %} 16944 16945 instruct vsra4S(vecD dst, vecS shift) %{ 16946 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16947 match(Set dst (RShiftVS dst shift)); 16948 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 16949 ins_encode %{ 16950 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 16951 %} 16952 ins_pipe( pipe_slow ); 16953 %} 16954 16955 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 16956 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 16957 match(Set dst (RShiftVS dst shift)); 16958 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 16959 ins_encode %{ 16960 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 16961 %} 16962 ins_pipe( pipe_slow ); 16963 %} 16964 16965 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 16966 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 16967 match(Set dst (RShiftVS src shift)); 16968 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 16969 ins_encode %{ 16970 int vector_len = 0; 16971 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16972 %} 16973 ins_pipe( pipe_slow ); 16974 %} 16975 16976 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 16977 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 16978 match(Set dst (RShiftVS src shift)); 16979 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 16980 ins_encode %{ 16981 int vector_len = 0; 16982 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16983 %} 16984 ins_pipe( pipe_slow ); 16985 %} 16986 16987 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 16988 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 16989 match(Set dst (RShiftVS dst shift)); 16990 effect(TEMP src); 16991 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 16992 ins_encode %{ 16993 int vector_len = 0; 16994 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 16995 %} 16996 ins_pipe( pipe_slow ); 16997 %} 16998 16999 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 17000 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 17001 match(Set dst (RShiftVS src shift)); 17002 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17003 ins_encode %{ 17004 int vector_len = 0; 17005 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17006 %} 17007 ins_pipe( pipe_slow ); 17008 %} 17009 17010 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 17011 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 17012 match(Set dst (RShiftVS src shift)); 17013 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17014 ins_encode %{ 17015 int vector_len = 0; 17016 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17017 %} 17018 ins_pipe( pipe_slow ); 17019 %} 17020 17021 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 17022 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 17023 match(Set dst (RShiftVS dst shift)); 17024 effect(TEMP src); 17025 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 17026 ins_encode %{ 17027 int vector_len = 0; 17028 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17029 %} 17030 ins_pipe( pipe_slow ); 17031 %} 17032 17033 instruct vsra8S(vecX dst, vecS shift) %{ 17034 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 17035 match(Set dst (RShiftVS dst shift)); 17036 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 17037 ins_encode %{ 17038 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 17039 %} 17040 ins_pipe( pipe_slow ); 17041 %} 17042 17043 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 17044 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 17045 match(Set dst (RShiftVS dst shift)); 17046 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 17047 ins_encode %{ 17048 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 17049 %} 17050 ins_pipe( pipe_slow ); 17051 %} 17052 17053 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 17054 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 17055 match(Set dst (RShiftVS src shift)); 17056 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17057 ins_encode %{ 17058 int vector_len = 0; 17059 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17060 %} 17061 ins_pipe( pipe_slow ); 17062 %} 17063 17064 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 17065 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 17066 match(Set dst (RShiftVS src shift)); 17067 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17068 ins_encode %{ 17069 int vector_len = 0; 17070 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17071 %} 17072 ins_pipe( pipe_slow ); 17073 %} 17074 17075 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 17076 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 17077 match(Set dst (RShiftVS dst shift)); 17078 effect(TEMP src); 17079 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17080 ins_encode %{ 17081 int vector_len = 0; 17082 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17083 %} 17084 ins_pipe( pipe_slow ); 17085 %} 17086 17087 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 17088 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 17089 match(Set dst (RShiftVS src shift)); 17090 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17091 ins_encode %{ 17092 int vector_len = 0; 17093 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17094 %} 17095 ins_pipe( pipe_slow ); 17096 %} 17097 17098 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 17099 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 17100 match(Set dst (RShiftVS src shift)); 17101 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17102 ins_encode %{ 17103 int vector_len = 0; 17104 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17105 %} 17106 ins_pipe( pipe_slow ); 17107 %} 17108 17109 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 17110 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 17111 match(Set dst (RShiftVS dst shift)); 17112 effect(TEMP src); 17113 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 17114 ins_encode %{ 17115 int vector_len = 0; 17116 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17117 %} 17118 ins_pipe( pipe_slow ); 17119 %} 17120 17121 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 17122 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 17123 match(Set dst (RShiftVS src shift)); 17124 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17125 ins_encode %{ 17126 int vector_len = 1; 17127 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17128 %} 17129 ins_pipe( pipe_slow ); 17130 %} 17131 17132 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 17133 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 17134 match(Set dst (RShiftVS src shift)); 17135 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17136 ins_encode %{ 17137 int vector_len = 1; 17138 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17139 %} 17140 ins_pipe( pipe_slow ); 17141 %} 17142 17143 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 17144 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 17145 match(Set dst (RShiftVS dst shift)); 17146 effect(TEMP src); 17147 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17148 ins_encode %{ 17149 int vector_len = 1; 17150 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17151 %} 17152 ins_pipe( pipe_slow ); 17153 %} 17154 17155 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 17156 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 17157 match(Set dst (RShiftVS src shift)); 17158 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17159 ins_encode %{ 17160 int vector_len = 1; 17161 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17162 %} 17163 ins_pipe( pipe_slow ); 17164 %} 17165 17166 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 17167 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 17168 match(Set dst (RShiftVS src shift)); 17169 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17170 ins_encode %{ 17171 int vector_len = 1; 17172 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17173 %} 17174 ins_pipe( pipe_slow ); 17175 %} 17176 17177 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 17178 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 17179 match(Set dst (RShiftVS dst shift)); 17180 effect(TEMP src); 17181 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 17182 ins_encode %{ 17183 int vector_len = 1; 17184 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17185 %} 17186 ins_pipe( pipe_slow ); 17187 %} 17188 17189 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 17190 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 17191 match(Set dst (RShiftVS src shift)); 17192 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 17193 ins_encode %{ 17194 int vector_len = 2; 17195 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17196 %} 17197 ins_pipe( pipe_slow ); 17198 %} 17199 17200 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 17201 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 17202 match(Set dst (RShiftVS src shift)); 17203 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 17204 ins_encode %{ 17205 int vector_len = 2; 17206 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17207 %} 17208 ins_pipe( pipe_slow ); 17209 %} 17210 17211 // Integers vector arithmetic right shift 17212 instruct vsra2I(vecD dst, vecS shift) %{ 17213 predicate(n->as_Vector()->length() == 2); 17214 match(Set dst (RShiftVI dst shift)); 17215 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 17216 ins_encode %{ 17217 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 17218 %} 17219 ins_pipe( pipe_slow ); 17220 %} 17221 17222 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 17223 predicate(n->as_Vector()->length() == 2); 17224 match(Set dst (RShiftVI dst shift)); 17225 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 17226 ins_encode %{ 17227 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 17228 %} 17229 ins_pipe( pipe_slow ); 17230 %} 17231 17232 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 17233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17234 match(Set dst (RShiftVI src shift)); 17235 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 17236 ins_encode %{ 17237 int vector_len = 0; 17238 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17239 %} 17240 ins_pipe( pipe_slow ); 17241 %} 17242 17243 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 17244 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17245 match(Set dst (RShiftVI src shift)); 17246 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 17247 ins_encode %{ 17248 int vector_len = 0; 17249 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17250 %} 17251 ins_pipe( pipe_slow ); 17252 %} 17253 17254 instruct vsra4I(vecX dst, vecS shift) %{ 17255 predicate(n->as_Vector()->length() == 4); 17256 match(Set dst (RShiftVI dst shift)); 17257 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 17258 ins_encode %{ 17259 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 17260 %} 17261 ins_pipe( pipe_slow ); 17262 %} 17263 17264 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 17265 predicate(n->as_Vector()->length() == 4); 17266 match(Set dst (RShiftVI dst shift)); 17267 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 17268 ins_encode %{ 17269 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 17270 %} 17271 ins_pipe( pipe_slow ); 17272 %} 17273 17274 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 17275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 17276 match(Set dst (RShiftVI src shift)); 17277 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 17278 ins_encode %{ 17279 int vector_len = 0; 17280 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17281 %} 17282 ins_pipe( pipe_slow ); 17283 %} 17284 17285 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 17286 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 17287 match(Set dst (RShiftVI src shift)); 17288 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 17289 ins_encode %{ 17290 int vector_len = 0; 17291 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17292 %} 17293 ins_pipe( pipe_slow ); 17294 %} 17295 17296 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 17297 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 17298 match(Set dst (RShiftVI src shift)); 17299 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 17300 ins_encode %{ 17301 int vector_len = 1; 17302 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17303 %} 17304 ins_pipe( pipe_slow ); 17305 %} 17306 17307 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 17308 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 17309 match(Set dst (RShiftVI src shift)); 17310 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 17311 ins_encode %{ 17312 int vector_len = 1; 17313 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17314 %} 17315 ins_pipe( pipe_slow ); 17316 %} 17317 17318 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 17319 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17320 match(Set dst (RShiftVI src shift)); 17321 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 17322 ins_encode %{ 17323 int vector_len = 2; 17324 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17325 %} 17326 ins_pipe( pipe_slow ); 17327 %} 17328 17329 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 17330 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 17331 match(Set dst (RShiftVI src shift)); 17332 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 17333 ins_encode %{ 17334 int vector_len = 2; 17335 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17336 %} 17337 ins_pipe( pipe_slow ); 17338 %} 17339 17340 // Long vector arithmetic right shift 17341 instruct vsra1L(vecD dst, vecD src, vecS shift, vecD tmp) %{ 17342 predicate(n->as_Vector()->length() == 1); 17343 match(Set dst (RShiftVL src shift)); 17344 effect(TEMP dst, TEMP tmp); 17345 format %{ "movdqu $dst,$src\n\t" 17346 "psrlq $dst,$shift\n\t" 17347 "movdqu $tmp,[0x8000000000000000]\n\t" 17348 "psrlq $tmp,$shift\n\t" 17349 "pxor $dst,$tmp\n\t" 17350 "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} 17351 ins_encode %{ 17352 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 17353 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 17354 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17355 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 17356 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 17357 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 17358 %} 17359 ins_pipe( pipe_slow ); 17360 %} 17361 17362 instruct vsra1L_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ 17363 predicate(n->as_Vector()->length() == 1); 17364 match(Set dst (RShiftVL src shift)); 17365 effect(TEMP dst, TEMP tmp); 17366 format %{ "movdqu $dst,$src\n\t" 17367 "psrlq $dst,$shift\n\t" 17368 "movdqu $tmp,[0x8000000000000000]\n\t" 17369 "psrlq $tmp,$shift\n\t" 17370 "pxor $dst,$tmp\n\t" 17371 "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} 17372 ins_encode %{ 17373 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 17374 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 17375 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17376 __ psrlq($tmp$$XMMRegister, (int)$shift$$constant); 17377 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 17378 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 17379 %} 17380 ins_pipe( pipe_slow ); 17381 %} 17382 17383 instruct vsra1L_reg(vecD dst, vecD src, vecS shift, vecD tmp) %{ 17384 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 17385 match(Set dst (RShiftVL src shift)); 17386 effect(TEMP dst, TEMP tmp); 17387 format %{ "vpsrlq $dst,$src,$shift\n\t" 17388 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17389 "vpsrlq $tmp,$tmp,$shift\n\t" 17390 "vpxor $dst,$dst,$tmp\n\t" 17391 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} 17392 ins_encode %{ 17393 int vector_len = 0; 17394 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17395 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17396 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17397 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17398 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17399 %} 17400 ins_pipe( pipe_slow ); 17401 %} 17402 17403 instruct vsra1L_reg_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ 17404 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 17405 match(Set dst (RShiftVL src shift)); 17406 effect(TEMP dst, TEMP tmp); 17407 format %{ "vpsrlq $dst,$src,$shift\n\t" 17408 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17409 "vpsrlq $tmp,$tmp,$shift\n\t" 17410 "vpxor $dst,$dst,$tmp\n\t" 17411 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} 17412 ins_encode %{ 17413 int vector_len = 0; 17414 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17415 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17416 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); 17417 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17418 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17419 %} 17420 ins_pipe( pipe_slow ); 17421 %} 17422 17423 instruct vsra1L_reg_evex(vecD dst, vecD src, vecS shift) %{ 17424 predicate(UseAVX > 2 && n->as_Vector()->length() == 1); 17425 match(Set dst (RShiftVL src shift)); 17426 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed1L" %} 17427 ins_encode %{ 17428 int vector_len = 0; 17429 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17430 %} 17431 ins_pipe( pipe_slow ); 17432 %} 17433 17434 instruct vsra2L_reg_imm(vecX dst, vecX src, immI8 shift, vecX tmp) %{ 17435 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17436 match(Set dst (RShiftVL src shift)); 17437 effect(TEMP dst, TEMP tmp); 17438 format %{ "vpsrlq $dst,$src,$shift\n\t" 17439 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17440 "vpsrlq $tmp,$tmp,$shift\n\t" 17441 "vpxor $dst,$dst,$tmp\n\t" 17442 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed2L" %} 17443 ins_encode %{ 17444 int vector_len = 0; 17445 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17446 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17447 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); 17448 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17449 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17450 %} 17451 ins_pipe( pipe_slow ); 17452 %} 17453 17454 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp) %{ 17455 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 17456 match(Set dst (RShiftVL src shift)); 17457 effect(TEMP dst, TEMP tmp); 17458 format %{ "vpsrlq $dst,$src,$shift\n\t" 17459 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17460 "vpsrlq $tmp,$tmp,$shift\n\t" 17461 "vpxor $dst,$dst,$tmp\n\t" 17462 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed2L" %} 17463 ins_encode %{ 17464 int vector_len = 0; 17465 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17466 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17467 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17468 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17469 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17470 %} 17471 ins_pipe( pipe_slow ); 17472 %} 17473 17474 instruct vsra2L_reg_evex_imm(vecX dst, vecX src, immI8 shift) %{ 17475 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 17476 match(Set dst (RShiftVL src shift)); 17477 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17478 ins_encode %{ 17479 int vector_len = 0; 17480 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17481 %} 17482 ins_pipe( pipe_slow ); 17483 %} 17484 17485 instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ 17486 predicate(UseAVX > 2 && n->as_Vector()->length() == 2); 17487 match(Set dst (RShiftVL src shift)); 17488 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17489 ins_encode %{ 17490 int vector_len = 0; 17491 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17492 %} 17493 ins_pipe( pipe_slow ); 17494 %} 17495 17496 instruct vsra4L_reg_imm(vecY dst, vecY src, immI8 shift, vecY tmp) %{ 17497 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 17498 match(Set dst (RShiftVL src shift)); 17499 effect(TEMP dst, TEMP tmp); 17500 format %{ "vpsrlq $dst,$src,$shift\n\t" 17501 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17502 "vpsrlq $tmp,$tmp,$shift\n\t" 17503 "vpxor $dst,$dst,$tmp\n\t" 17504 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 17505 ins_encode %{ 17506 int vector_len = 1; 17507 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17508 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17509 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); 17510 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17511 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17512 %} 17513 ins_pipe( pipe_slow ); 17514 %} 17515 17516 instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp) %{ 17517 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 17518 match(Set dst (RShiftVL src shift)); 17519 effect(TEMP dst, TEMP tmp); 17520 format %{ "vpsrlq $dst,$src,$shift\n\t" 17521 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17522 "vpsrlq $tmp,$tmp,$shift\n\t" 17523 "vpxor $dst,$dst,$tmp\n\t" 17524 "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} 17525 ins_encode %{ 17526 int vector_len = 1; 17527 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17528 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17529 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17530 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17531 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17532 %} 17533 ins_pipe( pipe_slow ); 17534 %} 17535 17536 instruct vsra4L_reg_evex_imm(vecY dst, vecY src, immI8 shift) %{ 17537 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 17538 match(Set dst (RShiftVL src shift)); 17539 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17540 ins_encode %{ 17541 int vector_len = 1; 17542 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17543 %} 17544 ins_pipe( pipe_slow ); 17545 %} 17546 17547 instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ 17548 predicate(UseAVX > 2 && n->as_Vector()->length() == 4); 17549 match(Set dst (RShiftVL src shift)); 17550 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} 17551 ins_encode %{ 17552 int vector_len = 1; 17553 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17554 %} 17555 ins_pipe( pipe_slow ); 17556 %} 17557 17558 instruct vsra8L_reg_evex_imm(vecZ dst, vecZ src, immI8 shift) %{ 17559 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 17560 match(Set dst (RShiftVL src shift)); 17561 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} 17562 ins_encode %{ 17563 int vector_len = 2; 17564 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 17565 %} 17566 ins_pipe( pipe_slow ); 17567 %} 17568 17569 instruct vsra8L_reg_evex(vecZ dst, vecZ src, vecS shift) %{ 17570 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 17571 match(Set dst (RShiftVL src shift)); 17572 format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed8L" %} 17573 ins_encode %{ 17574 int vector_len = 2; 17575 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17576 %} 17577 ins_pipe( pipe_slow ); 17578 %} 17579 17580 // ------------------- Variable Bit Shift Left Logical ----------------------------- 17581 //Integer Variable left shift 17582 instruct vsllv2I(vecD dst, vecD src, vecD shift) %{ 17583 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 17584 match(Set dst (LShiftVI src shift)); 17585 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed2I" %} 17586 ins_encode %{ 17587 int vector_len = 0; 17588 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17589 %} 17590 ins_pipe( pipe_slow ); 17591 %} 17592 17593 instruct vsllv4I_reg(vecX dst, vecX src, vecX shift) %{ 17594 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 17595 match(Set dst (LShiftVI src shift)); 17596 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 17597 ins_encode %{ 17598 int vector_len = 0; 17599 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17600 %} 17601 ins_pipe( pipe_slow ); 17602 %} 17603 17604 instruct vsllv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 17605 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 17606 match(Set dst (LShiftVI src shift)); 17607 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed4I" %} 17608 ins_encode %{ 17609 int vector_len = 0; 17610 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17611 %} 17612 ins_pipe( pipe_slow ); 17613 %} 17614 17615 instruct vsllv8I_reg(vecY dst, vecY src, vecY shift) %{ 17616 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17617 match(Set dst (LShiftVI src shift)); 17618 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 17619 ins_encode %{ 17620 int vector_len = 1; 17621 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17622 %} 17623 ins_pipe( pipe_slow ); 17624 %} 17625 17626 instruct vsllv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 17627 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17628 match(Set dst (LShiftVI src shift)); 17629 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed8I" %} 17630 ins_encode %{ 17631 int vector_len = 1; 17632 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17633 %} 17634 ins_pipe( pipe_slow ); 17635 %} 17636 17637 instruct vsllv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17638 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_LShiftCntV); 17639 match(Set dst (LShiftVI src shift)); 17640 format %{ "vpsllvd $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 17641 ins_encode %{ 17642 int vector_len = 2; 17643 __ vpsllvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17644 %} 17645 ins_pipe( pipe_slow ); 17646 %} 17647 17648 //Long Variable left shift 17649 instruct vsllv1L_reg(vecD dst, vecD src, vecD shift) %{ 17650 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_LShiftCntV); 17651 match(Set dst (LShiftVL src shift)); 17652 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed1L" %} 17653 ins_encode %{ 17654 int vector_len = 0; 17655 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17656 %} 17657 ins_pipe( pipe_slow ); 17658 %} 17659 17660 instruct vsllv2L_reg(vecX dst, vecX src, vecX shift) %{ 17661 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 17662 match(Set dst (LShiftVL src shift)); 17663 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 17664 ins_encode %{ 17665 int vector_len = 0; 17666 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17667 %} 17668 ins_pipe( pipe_slow ); 17669 %} 17670 17671 instruct vsllv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 17672 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_LShiftCntV); 17673 match(Set dst (LShiftVL src shift)); 17674 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed2L" %} 17675 ins_encode %{ 17676 int vector_len = 0; 17677 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17678 %} 17679 ins_pipe( pipe_slow ); 17680 %} 17681 17682 instruct vsllv4L_reg(vecY dst, vecY src, vecY shift) %{ 17683 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_LShiftCntV); 17684 match(Set dst (LShiftVL src shift)); 17685 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 17686 ins_encode %{ 17687 int vector_len = 1; 17688 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17689 %} 17690 ins_pipe( pipe_slow ); 17691 %} 17692 17693 instruct vsllv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 17694 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17695 match(Set dst (LShiftVL src shift)); 17696 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed4L" %} 17697 ins_encode %{ 17698 int vector_len = 1; 17699 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17700 %} 17701 ins_pipe( pipe_slow ); 17702 %} 17703 17704 instruct vsllv8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17705 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_LShiftCntV); 17706 match(Set dst (LShiftVL src shift)); 17707 format %{ "vpsllvq $dst,$src,$shift\t! variable bit shift left shift packed16I" %} 17708 ins_encode %{ 17709 int vector_len = 2; 17710 __ vpsllvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17711 %} 17712 ins_pipe( pipe_slow ); 17713 %} 17714 17715 // ------------------- Variable Bit Shift Right Logical ----------------------------- 17716 //Integer Variable right shift 17717 instruct vsrlv2I_reg(vecD dst, vecD src, vecD shift) %{ 17718 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17719 match(Set dst (URShiftVI src shift)); 17720 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 17721 ins_encode %{ 17722 int vector_len = 0; 17723 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17724 %} 17725 ins_pipe( pipe_slow ); 17726 %} 17727 17728 instruct vsrlv4I_reg(vecX dst, vecX src, vecX shift) %{ 17729 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17730 match(Set dst (URShiftVI src shift)); 17731 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17732 ins_encode %{ 17733 int vector_len = 0; 17734 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17735 %} 17736 ins_pipe( pipe_slow ); 17737 %} 17738 17739 instruct vsrlv4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 17740 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17741 match(Set dst (URShiftVI src shift)); 17742 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17743 ins_encode %{ 17744 int vector_len = 0; 17745 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17746 %} 17747 ins_pipe( pipe_slow ); 17748 %} 17749 17750 instruct vsrlv8I_reg(vecY dst, vecY src, vecY shift) %{ 17751 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17752 match(Set dst (URShiftVI src shift)); 17753 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17754 ins_encode %{ 17755 int vector_len = 1; 17756 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17757 %} 17758 ins_pipe( pipe_slow ); 17759 %} 17760 17761 instruct vsrlv8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 17762 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17763 match(Set dst (URShiftVI src shift)); 17764 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17765 ins_encode %{ 17766 int vector_len = 1; 17767 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17768 %} 17769 ins_pipe( pipe_slow ); 17770 %} 17771 17772 instruct vsrlv16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17773 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 17774 match(Set dst (URShiftVI src shift)); 17775 format %{ "vpsrlvd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 17776 ins_encode %{ 17777 int vector_len = 2; 17778 __ vpsrlvd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17779 %} 17780 ins_pipe( pipe_slow ); 17781 %} 17782 17783 //Long Variable right shift 17784 instruct vsrlv1L_reg(vecD dst, vecD src, vecD shift) %{ 17785 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 17786 match(Set dst (URShiftVL src shift)); 17787 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} 17788 ins_encode %{ 17789 int vector_len = 0; 17790 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17791 %} 17792 ins_pipe( pipe_slow ); 17793 %} 17794 17795 instruct vsrlv2L_reg(vecX dst, vecX src, vecX shift) %{ 17796 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17797 match(Set dst (URShiftVL src shift)); 17798 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 17799 ins_encode %{ 17800 int vector_len = 0; 17801 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17802 %} 17803 ins_pipe( pipe_slow ); 17804 %} 17805 17806 instruct vsrlv2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 17807 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17808 match(Set dst (URShiftVL src shift)); 17809 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} 17810 ins_encode %{ 17811 int vector_len = 0; 17812 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17813 %} 17814 ins_pipe( pipe_slow ); 17815 %} 17816 17817 instruct vsrlv4L_reg(vecY dst, vecY src, vecY shift) %{ 17818 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17819 match(Set dst (URShiftVL src shift)); 17820 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 17821 ins_encode %{ 17822 int vector_len = 1; 17823 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17824 %} 17825 ins_pipe( pipe_slow ); 17826 %} 17827 17828 instruct vsrlv4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 17829 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17830 match(Set dst (URShiftVL src shift)); 17831 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 17832 ins_encode %{ 17833 int vector_len = 1; 17834 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17835 %} 17836 ins_pipe( pipe_slow ); 17837 %} 17838 17839 instruct vsrlv8L_reg(vecZ dst, vecZ src, vecZ shift) %{ 17840 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17841 match(Set dst (URShiftVL src shift)); 17842 format %{ "vpsrlvq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 17843 ins_encode %{ 17844 int vector_len = 2; 17845 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17846 %} 17847 ins_pipe( pipe_slow ); 17848 %} 17849 17850 // ------------------- Variable Bit Shift Right Arithmetic ----------------------------- 17851 //Integer Variable right shift 17852 instruct vsrav2I_reg(vecD dst, vecD src, vecD shift) %{ 17853 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17854 match(Set dst (RShiftVI src shift)); 17855 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed2I" %} 17856 ins_encode %{ 17857 int vector_len = 0; 17858 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17859 %} 17860 ins_pipe( pipe_slow ); 17861 %} 17862 17863 instruct vsrav4I_reg(vecX dst, vecX src, vecX shift) %{ 17864 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17865 match(Set dst (RShiftVI src shift)); 17866 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17867 ins_encode %{ 17868 int vector_len = 0; 17869 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17870 %} 17871 ins_pipe( pipe_slow ); 17872 %} 17873 17874 instruct vsrav4I_reg_evex(vecX dst, vecX src, vecX shift) %{ 17875 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17876 match(Set dst (RShiftVI src shift)); 17877 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed4I" %} 17878 ins_encode %{ 17879 int vector_len = 0; 17880 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17881 %} 17882 ins_pipe( pipe_slow ); 17883 %} 17884 17885 instruct vsrav8I_reg(vecY dst, vecY src, vecY shift) %{ 17886 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17887 match(Set dst (RShiftVI src shift)); 17888 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17889 ins_encode %{ 17890 int vector_len = 1; 17891 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17892 %} 17893 ins_pipe( pipe_slow ); 17894 %} 17895 17896 instruct vsrav8I_reg_evex(vecY dst, vecY src, vecY shift) %{ 17897 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 17898 match(Set dst (RShiftVI src shift)); 17899 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed8I" %} 17900 ins_encode %{ 17901 int vector_len = 1; 17902 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17903 %} 17904 ins_pipe( pipe_slow ); 17905 %} 17906 17907 instruct vsrav16I_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 17908 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->in(2)->Opcode() != Op_RShiftCntV); 17909 match(Set dst (RShiftVI src shift)); 17910 format %{ "vpsravd $dst,$src,$shift\t! variable bit shift right shift packed16I" %} 17911 ins_encode %{ 17912 int vector_len = 2; 17913 __ vpsravd($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17914 %} 17915 ins_pipe( pipe_slow ); 17916 %} 17917 17918 //Long Variable right shift arithmetic 17919 instruct vsrav1L_reg(vecD dst, vecD src, vecD shift, vecD tmp) %{ 17920 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 17921 match(Set dst (RShiftVL src shift)); 17922 effect(TEMP dst, TEMP tmp); 17923 format %{ "vpsrlvq $dst,$src,$shift\n\t" 17924 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17925 "vpsrlvq $tmp,$tmp,$shift\n\t" 17926 "vpxor $dst,$dst,$tmp\n\t" 17927 "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed1L" %} 17928 ins_encode %{ 17929 int vector_len = 0; 17930 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17931 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17932 __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17933 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17934 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17935 %} 17936 ins_pipe( pipe_slow ); 17937 %} 17938 17939 instruct vsrav1L_reg_evex(vecD dst, vecD src, vecD shift) %{ 17940 predicate(UseAVX > 2 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); 17941 match(Set dst (RShiftVL src shift)); 17942 format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed1L" %} 17943 ins_encode %{ 17944 int vector_len = 0; 17945 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17946 %} 17947 ins_pipe( pipe_slow ); 17948 %} 17949 17950 instruct vsrav2L_reg(vecX dst, vecX src, vecX shift, vecX tmp) %{ 17951 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17952 match(Set dst (RShiftVL src shift)); 17953 effect(TEMP dst, TEMP tmp); 17954 format %{ "vpsrlvq $dst,$src,$shift\n\t" 17955 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17956 "vpsrlvq $tmp,$tmp,$shift\n\t" 17957 "vpxor $dst,$dst,$tmp\n\t" 17958 "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed2L" %} 17959 ins_encode %{ 17960 int vector_len = 0; 17961 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17962 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17963 __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17964 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17965 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17966 %} 17967 ins_pipe( pipe_slow ); 17968 %} 17969 17970 instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{ 17971 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); 17972 match(Set dst (RShiftVL src shift)); 17973 format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed2L" %} 17974 ins_encode %{ 17975 int vector_len = 0; 17976 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17977 %} 17978 ins_pipe( pipe_slow ); 17979 %} 17980 17981 instruct vsrav4L_reg(vecY dst, vecY src, vecY shift, vecY tmp) %{ 17982 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 17983 match(Set dst (RShiftVL src shift)); 17984 effect(TEMP dst, TEMP tmp); 17985 format %{ "vpsrlvq $dst,$src,$shift\n\t" 17986 "vmovdqu $tmp,[0x8000000000000000]\n\t" 17987 "vpsrlvq $tmp,$tmp,$shift\n\t" 17988 "vpxor $dst,$dst,$tmp\n\t" 17989 "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed4L" %} 17990 ins_encode %{ 17991 int vector_len = 1; 17992 __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 17993 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); 17994 __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); 17995 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17996 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 17997 %} 17998 ins_pipe( pipe_slow ); 17999 %} 18000 18001 instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{ 18002 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); 18003 match(Set dst (RShiftVL src shift)); 18004 format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} 18005 ins_encode %{ 18006 int vector_len = 1; 18007 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 18008 %} 18009 ins_pipe( pipe_slow ); 18010 %} 18011 18012 instruct vsrav8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ 18013 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); 18014 match(Set dst (RShiftVL src shift)); 18015 format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} 18016 ins_encode %{ 18017 int vector_len = 2; 18018 __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 18019 %} 18020 ins_pipe( pipe_slow ); 18021 %} 18022 18023 // --------------------------------- AND -------------------------------------- 18024 18025 instruct vand4B(vecS dst, vecS src) %{ 18026 predicate(n->as_Vector()->length_in_bytes() == 4); 18027 match(Set dst (AndV dst src)); 18028 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 18029 ins_encode %{ 18030 __ pand($dst$$XMMRegister, $src$$XMMRegister); 18031 %} 18032 ins_pipe( pipe_slow ); 18033 %} 18034 18035 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 18036 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18037 match(Set dst (AndV src1 src2)); 18038 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 18039 ins_encode %{ 18040 int vector_len = 0; 18041 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18042 %} 18043 ins_pipe( pipe_slow ); 18044 %} 18045 18046 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 18047 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18048 match(Set dst (AndV src (LoadVector mem))); 18049 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 18050 ins_encode %{ 18051 int vector_len = 0; 18052 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18053 %} 18054 ins_pipe( pipe_slow ); 18055 %} 18056 18057 instruct vand8B(vecD dst, vecD src) %{ 18058 predicate(n->as_Vector()->length_in_bytes() == 8); 18059 match(Set dst (AndV dst src)); 18060 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 18061 ins_encode %{ 18062 __ pand($dst$$XMMRegister, $src$$XMMRegister); 18063 %} 18064 ins_pipe( pipe_slow ); 18065 %} 18066 18067 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 18068 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18069 match(Set dst (AndV src1 src2)); 18070 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 18071 ins_encode %{ 18072 int vector_len = 0; 18073 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18074 %} 18075 ins_pipe( pipe_slow ); 18076 %} 18077 18078 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 18079 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18080 match(Set dst (AndV src (LoadVector mem))); 18081 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 18082 ins_encode %{ 18083 int vector_len = 0; 18084 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18085 %} 18086 ins_pipe( pipe_slow ); 18087 %} 18088 18089 instruct vand16B(vecX dst, vecX src) %{ 18090 predicate(n->as_Vector()->length_in_bytes() == 16); 18091 match(Set dst (AndV dst src)); 18092 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 18093 ins_encode %{ 18094 __ pand($dst$$XMMRegister, $src$$XMMRegister); 18095 %} 18096 ins_pipe( pipe_slow ); 18097 %} 18098 18099 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 18100 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18101 match(Set dst (AndV src1 src2)); 18102 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 18103 ins_encode %{ 18104 int vector_len = 0; 18105 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18106 %} 18107 ins_pipe( pipe_slow ); 18108 %} 18109 18110 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 18111 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18112 match(Set dst (AndV src (LoadVector mem))); 18113 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 18114 ins_encode %{ 18115 int vector_len = 0; 18116 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18117 %} 18118 ins_pipe( pipe_slow ); 18119 %} 18120 18121 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 18122 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18123 match(Set dst (AndV src1 src2)); 18124 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 18125 ins_encode %{ 18126 int vector_len = 1; 18127 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18128 %} 18129 ins_pipe( pipe_slow ); 18130 %} 18131 18132 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 18133 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18134 match(Set dst (AndV src (LoadVector mem))); 18135 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 18136 ins_encode %{ 18137 int vector_len = 1; 18138 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18139 %} 18140 ins_pipe( pipe_slow ); 18141 %} 18142 18143 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 18144 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18145 match(Set dst (AndV src1 src2)); 18146 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 18147 ins_encode %{ 18148 int vector_len = 2; 18149 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18150 %} 18151 ins_pipe( pipe_slow ); 18152 %} 18153 18154 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 18155 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18156 match(Set dst (AndV src (LoadVector mem))); 18157 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 18158 ins_encode %{ 18159 int vector_len = 2; 18160 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18161 %} 18162 ins_pipe( pipe_slow ); 18163 %} 18164 18165 // --------------------------------- OR --------------------------------------- 18166 18167 instruct vor4B(vecS dst, vecS src) %{ 18168 predicate(n->as_Vector()->length_in_bytes() == 4); 18169 match(Set dst (OrV dst src)); 18170 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 18171 ins_encode %{ 18172 __ por($dst$$XMMRegister, $src$$XMMRegister); 18173 %} 18174 ins_pipe( pipe_slow ); 18175 %} 18176 18177 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 18178 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18179 match(Set dst (OrV src1 src2)); 18180 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 18181 ins_encode %{ 18182 int vector_len = 0; 18183 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18184 %} 18185 ins_pipe( pipe_slow ); 18186 %} 18187 18188 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 18189 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18190 match(Set dst (OrV src (LoadVector mem))); 18191 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 18192 ins_encode %{ 18193 int vector_len = 0; 18194 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18195 %} 18196 ins_pipe( pipe_slow ); 18197 %} 18198 18199 instruct vor8B(vecD dst, vecD src) %{ 18200 predicate(n->as_Vector()->length_in_bytes() == 8); 18201 match(Set dst (OrV dst src)); 18202 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 18203 ins_encode %{ 18204 __ por($dst$$XMMRegister, $src$$XMMRegister); 18205 %} 18206 ins_pipe( pipe_slow ); 18207 %} 18208 18209 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 18210 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18211 match(Set dst (OrV src1 src2)); 18212 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 18213 ins_encode %{ 18214 int vector_len = 0; 18215 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18216 %} 18217 ins_pipe( pipe_slow ); 18218 %} 18219 18220 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 18221 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18222 match(Set dst (OrV src (LoadVector mem))); 18223 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 18224 ins_encode %{ 18225 int vector_len = 0; 18226 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18227 %} 18228 ins_pipe( pipe_slow ); 18229 %} 18230 18231 instruct vor16B(vecX dst, vecX src) %{ 18232 predicate(n->as_Vector()->length_in_bytes() == 16); 18233 match(Set dst (OrV dst src)); 18234 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 18235 ins_encode %{ 18236 __ por($dst$$XMMRegister, $src$$XMMRegister); 18237 %} 18238 ins_pipe( pipe_slow ); 18239 %} 18240 18241 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 18242 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18243 match(Set dst (OrV src1 src2)); 18244 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 18245 ins_encode %{ 18246 int vector_len = 0; 18247 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18248 %} 18249 ins_pipe( pipe_slow ); 18250 %} 18251 18252 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 18253 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18254 match(Set dst (OrV src (LoadVector mem))); 18255 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 18256 ins_encode %{ 18257 int vector_len = 0; 18258 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18259 %} 18260 ins_pipe( pipe_slow ); 18261 %} 18262 18263 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 18264 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18265 match(Set dst (OrV src1 src2)); 18266 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 18267 ins_encode %{ 18268 int vector_len = 1; 18269 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18270 %} 18271 ins_pipe( pipe_slow ); 18272 %} 18273 18274 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 18275 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18276 match(Set dst (OrV src (LoadVector mem))); 18277 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 18278 ins_encode %{ 18279 int vector_len = 1; 18280 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18281 %} 18282 ins_pipe( pipe_slow ); 18283 %} 18284 18285 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 18286 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18287 match(Set dst (OrV src1 src2)); 18288 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 18289 ins_encode %{ 18290 int vector_len = 2; 18291 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18292 %} 18293 ins_pipe( pipe_slow ); 18294 %} 18295 18296 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 18297 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18298 match(Set dst (OrV src (LoadVector mem))); 18299 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 18300 ins_encode %{ 18301 int vector_len = 2; 18302 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18303 %} 18304 ins_pipe( pipe_slow ); 18305 %} 18306 18307 // --------------------------------- XOR -------------------------------------- 18308 18309 instruct vxor4B(vecS dst, vecS src) %{ 18310 predicate(n->as_Vector()->length_in_bytes() == 4); 18311 match(Set dst (XorV dst src)); 18312 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 18313 ins_encode %{ 18314 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 18315 %} 18316 ins_pipe( pipe_slow ); 18317 %} 18318 18319 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 18320 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18321 match(Set dst (XorV src1 src2)); 18322 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 18323 ins_encode %{ 18324 int vector_len = 0; 18325 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18326 %} 18327 ins_pipe( pipe_slow ); 18328 %} 18329 18330 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 18331 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 18332 match(Set dst (XorV src (LoadVector mem))); 18333 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 18334 ins_encode %{ 18335 int vector_len = 0; 18336 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18337 %} 18338 ins_pipe( pipe_slow ); 18339 %} 18340 18341 instruct vxor8B(vecD dst, vecD src) %{ 18342 predicate(n->as_Vector()->length_in_bytes() == 8); 18343 match(Set dst (XorV dst src)); 18344 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 18345 ins_encode %{ 18346 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 18347 %} 18348 ins_pipe( pipe_slow ); 18349 %} 18350 18351 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 18352 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18353 match(Set dst (XorV src1 src2)); 18354 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 18355 ins_encode %{ 18356 int vector_len = 0; 18357 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18358 %} 18359 ins_pipe( pipe_slow ); 18360 %} 18361 18362 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 18363 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 18364 match(Set dst (XorV src (LoadVector mem))); 18365 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 18366 ins_encode %{ 18367 int vector_len = 0; 18368 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18369 %} 18370 ins_pipe( pipe_slow ); 18371 %} 18372 18373 instruct vxor16B(vecX dst, vecX src) %{ 18374 predicate(n->as_Vector()->length_in_bytes() == 16); 18375 match(Set dst (XorV dst src)); 18376 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 18377 ins_encode %{ 18378 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 18379 %} 18380 ins_pipe( pipe_slow ); 18381 %} 18382 18383 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 18384 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18385 match(Set dst (XorV src1 src2)); 18386 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 18387 ins_encode %{ 18388 int vector_len = 0; 18389 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18390 %} 18391 ins_pipe( pipe_slow ); 18392 %} 18393 18394 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 18395 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 18396 match(Set dst (XorV src (LoadVector mem))); 18397 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 18398 ins_encode %{ 18399 int vector_len = 0; 18400 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18401 %} 18402 ins_pipe( pipe_slow ); 18403 %} 18404 18405 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 18406 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18407 match(Set dst (XorV src1 src2)); 18408 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 18409 ins_encode %{ 18410 int vector_len = 1; 18411 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18412 %} 18413 ins_pipe( pipe_slow ); 18414 %} 18415 18416 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 18417 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 18418 match(Set dst (XorV src (LoadVector mem))); 18419 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 18420 ins_encode %{ 18421 int vector_len = 1; 18422 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18423 %} 18424 ins_pipe( pipe_slow ); 18425 %} 18426 18427 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 18428 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18429 match(Set dst (XorV src1 src2)); 18430 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 18431 ins_encode %{ 18432 int vector_len = 2; 18433 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 18434 %} 18435 ins_pipe( pipe_slow ); 18436 %} 18437 18438 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 18439 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 18440 match(Set dst (XorV src (LoadVector mem))); 18441 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 18442 ins_encode %{ 18443 int vector_len = 2; 18444 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 18445 %} 18446 ins_pipe( pipe_slow ); 18447 %} 18448 18449 instruct vcvt4Bto4S_reg(vecD dst, vecS src) %{ 18450 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18451 match(Set dst (VectorCastB2X src)); 18452 format %{ "vpmovsxbw $dst,$src\t! convert 4B to 4S vector" %} 18453 ins_encode %{ 18454 int vector_len = 0; 18455 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18456 %} 18457 ins_pipe( pipe_slow ); 18458 %} 18459 18460 instruct vcvt8Bto8S_reg(vecX dst, vecD src) %{ 18461 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18462 match(Set dst (VectorCastB2X src)); 18463 format %{ "vpmovsxbw $dst,$src\t! convert 8B to 8S vector" %} 18464 ins_encode %{ 18465 int vector_len = 0; 18466 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18467 %} 18468 ins_pipe( pipe_slow ); 18469 %} 18470 18471 instruct vcvt16Bto16S_reg(vecY dst, vecX src) %{ 18472 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18473 match(Set dst (VectorCastB2X src)); 18474 format %{ "vpmovsxbw $dst,$src\t! convert 16B to 16S vector" %} 18475 ins_encode %{ 18476 int vector_len = 1; 18477 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18478 %} 18479 ins_pipe( pipe_slow ); 18480 %} 18481 18482 instruct vcvt32Bto32S_reg(vecZ dst, vecY src) %{ 18483 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18484 match(Set dst (VectorCastB2X src)); 18485 format %{ "vpmovsxbw $dst,$src\t! convert 32B to 32S vector" %} 18486 ins_encode %{ 18487 int vector_len = 2; 18488 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18489 %} 18490 ins_pipe( pipe_slow ); 18491 %} 18492 18493 instruct vcvt4Bto4I_reg(vecX dst, vecS src) %{ 18494 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18495 match(Set dst (VectorCastB2X src)); 18496 format %{ "vpmovsxbd $dst,$src\t! convert 4B to 4I vector" %} 18497 ins_encode %{ 18498 int vector_len = 0; 18499 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18500 %} 18501 ins_pipe( pipe_slow ); 18502 %} 18503 18504 instruct vcvt8Bto8I_reg(vecY dst, vecD src) %{ 18505 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18506 match(Set dst (VectorCastB2X src)); 18507 format %{ "vpmovsxbd $dst,$src\t! convert 8B to 8I vector" %} 18508 ins_encode %{ 18509 int vector_len = 1; 18510 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18511 %} 18512 ins_pipe( pipe_slow ); 18513 %} 18514 18515 instruct vcvt16Bto16I_reg(vecZ dst, vecX src) %{ 18516 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18517 match(Set dst (VectorCastB2X src)); 18518 format %{ "vpmovsxbd $dst,$src\t! convert 16B to 16I vector" %} 18519 ins_encode %{ 18520 int vector_len = 2; 18521 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18522 %} 18523 ins_pipe( pipe_slow ); 18524 %} 18525 18526 instruct vcvt4Bto4L_reg(vecY dst, vecS src) %{ 18527 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18528 match(Set dst (VectorCastB2X src)); 18529 format %{ "vpmovsxbq $dst,$src\t! convert 4B to 4L vector" %} 18530 ins_encode %{ 18531 int vector_len = 1; 18532 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18533 %} 18534 ins_pipe( pipe_slow ); 18535 %} 18536 18537 instruct vcvt8Bto8L_reg(vecZ dst, vecD src) %{ 18538 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18539 match(Set dst (VectorCastB2X src)); 18540 format %{ "vpmovsxbq $dst,$src\t! convert 8B to 8L vector" %} 18541 ins_encode %{ 18542 int vector_len = 2; 18543 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18544 %} 18545 ins_pipe( pipe_slow ); 18546 %} 18547 18548 instruct vcvt4Bto4F_reg(vecX dst, vecS src) %{ 18549 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18550 match(Set dst (VectorCastB2X src)); 18551 format %{ "vpmovsxbd $dst,$src\n\t" 18552 "vcvtdq2ps $dst,$dst\t! convert 4B to 4F vector" %} 18553 ins_encode %{ 18554 int vector_len = 0; 18555 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18556 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18557 %} 18558 ins_pipe( pipe_slow ); 18559 %} 18560 18561 instruct vcvt8Bto8F_reg(vecY dst, vecD src) %{ 18562 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18563 match(Set dst (VectorCastB2X src)); 18564 format %{ "vpmovsxbd $dst,$src\n\t" 18565 "vcvtdq2ps $dst,$dst\t! convert 8B to 8F vector" %} 18566 ins_encode %{ 18567 int vector_len = 1; 18568 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18569 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18570 %} 18571 ins_pipe( pipe_slow ); 18572 %} 18573 18574 instruct vcvt16Bto16F_reg(vecZ dst, vecX src) %{ 18575 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18576 match(Set dst (VectorCastB2X src)); 18577 format %{ "vpmovsxbd $dst,$src\n\t" 18578 "vcvtdq2ps $dst,$dst\t! convert 16B to 16F vector" %} 18579 ins_encode %{ 18580 int vector_len = 2; 18581 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18582 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18583 %} 18584 ins_pipe( pipe_slow ); 18585 %} 18586 18587 instruct vcvt4Bto4D_reg(vecY dst, vecS src) %{ 18588 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18589 match(Set dst (VectorCastB2X src)); 18590 format %{ "vpmovsxbd $dst,$src\n\t" 18591 "vcvtdq2pd $dst,$dst\t! convert 4B to 4D vector" %} 18592 ins_encode %{ 18593 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, 0); 18594 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, 1); 18595 %} 18596 ins_pipe( pipe_slow ); 18597 %} 18598 18599 instruct vcvt8Bto8D_reg(vecZ dst, vecD src) %{ 18600 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18601 match(Set dst (VectorCastB2X src)); 18602 format %{ "vpmovsxbd $dst,$src\n\t" 18603 "vcvtdq2pd $dst,$dst\t! convert 8B to 8D vector" %} 18604 ins_encode %{ 18605 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, 1); 18606 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, 2); 18607 %} 18608 ins_pipe( pipe_slow ); 18609 %} 18610 18611 instruct vcvt4Sto4B_reg(vecS dst, vecD src, rRegL scratch) %{ 18612 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18613 effect(TEMP scratch); 18614 match(Set dst (VectorCastS2X src)); 18615 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 18616 "vpackuswb $dst,$dst\t! convert 4S to 4B vector" %} 18617 ins_encode %{ 18618 int vector_len = 0; 18619 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 18620 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18621 %} 18622 ins_pipe( pipe_slow ); 18623 %} 18624 18625 instruct vcvt8Sto8B_reg(vecD dst, vecX src, rRegL scratch) %{ 18626 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18627 effect(TEMP scratch); 18628 match(Set dst (VectorCastS2X src)); 18629 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 18630 "vpackuswb $dst,$dst\t! convert 8S to 8B vector" %} 18631 ins_encode %{ 18632 int vector_len = 0; 18633 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 18634 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18635 %} 18636 ins_pipe( pipe_slow ); 18637 %} 18638 18639 instruct vcvt16Sto16B_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ 18640 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18641 effect(TEMP scratch, TEMP tmp); 18642 match(Set dst (VectorCastS2X src)); 18643 format %{ "vpand $dst,$src,[0x00FF00FF00FF00FF]\n\t" 18644 "vextracti128 $tmp,$dst,0x1\n\t" 18645 "vpackuswb $dst,$dst,$tmp\t! convert 16S to 16B vector" %} 18646 ins_encode %{ 18647 int vector_len = 1; 18648 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register); 18649 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18650 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18651 %} 18652 ins_pipe( pipe_slow ); 18653 %} 18654 18655 instruct vcvt32Sto32B_reg(vecY dst, vecZ src) %{ 18656 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18657 match(Set dst (VectorCastS2X src)); 18658 format %{ "evpmovwb $dst,$src\t! convert 32S to 32B vector" %} 18659 ins_encode %{ 18660 int vector_len = 2; 18661 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18662 %} 18663 ins_pipe( pipe_slow ); 18664 %} 18665 18666 instruct vcvt2Sto2I_reg(vecD dst, vecS src) %{ 18667 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18668 match(Set dst (VectorCastS2X src)); 18669 format %{ "vpmovsxwd $dst,$src\t! convert 2S to 2I vector" %} 18670 ins_encode %{ 18671 int vector_len = 0; 18672 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18673 %} 18674 ins_pipe( pipe_slow ); 18675 %} 18676 18677 instruct vcvt4Sto4I_reg(vecX dst, vecD src) %{ 18678 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18679 match(Set dst (VectorCastS2X src)); 18680 format %{ "vpmovsxwd $dst,$src\t! convert 4S to 4I vector" %} 18681 ins_encode %{ 18682 int vector_len = 0; 18683 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18684 %} 18685 ins_pipe( pipe_slow ); 18686 %} 18687 18688 instruct vcvt8Sto8I_reg(vecY dst, vecX src) %{ 18689 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18690 match(Set dst (VectorCastS2X src)); 18691 format %{ "vpmovsxwd $dst,$src\t! convert 8S to 8I vector" %} 18692 ins_encode %{ 18693 int vector_len = 1; 18694 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18695 %} 18696 ins_pipe( pipe_slow ); 18697 %} 18698 18699 instruct vcvt16Sto16I_reg(vecZ dst, vecY src) %{ 18700 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 18701 match(Set dst (VectorCastS2X src)); 18702 format %{ "vpmovsxwd $dst,$src\t! convert 16S to 16I vector" %} 18703 ins_encode %{ 18704 int vector_len = 2; 18705 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18706 %} 18707 ins_pipe( pipe_slow ); 18708 %} 18709 18710 instruct vcvt2Sto2L_reg(vecX dst, vecS src) %{ 18711 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18712 match(Set dst (VectorCastS2X src)); 18713 format %{ "vpmovsxwq $dst,$src\t! convert 2S to 2L vector" %} 18714 ins_encode %{ 18715 int vector_len = 0; 18716 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18717 %} 18718 ins_pipe( pipe_slow ); 18719 %} 18720 18721 instruct vcvt4Sto4L_reg(vecY dst, vecD src) %{ 18722 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18723 match(Set dst (VectorCastS2X src)); 18724 format %{ "vpmovsxwq $dst,$src\t! convert 4S to 4L vector" %} 18725 ins_encode %{ 18726 int vector_len = 1; 18727 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18728 %} 18729 ins_pipe( pipe_slow ); 18730 %} 18731 18732 instruct vcvt8Sto8L_reg(vecZ dst, vecX src) %{ 18733 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18734 match(Set dst (VectorCastS2X src)); 18735 format %{ "vpmovsxwq $dst,$src\t! convert 8S to 8L vector" %} 18736 ins_encode %{ 18737 int vector_len = 2; 18738 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18739 %} 18740 ins_pipe( pipe_slow ); 18741 %} 18742 18743 instruct vcvt2Sto2F_reg(vecD dst, vecS src) %{ 18744 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18745 match(Set dst (VectorCastS2X src)); 18746 format %{ "vpmovsxwd $dst,$src\n\t" 18747 "vcvtdq2ps $dst,$dst\t! convert 2S to 2F vector" %} 18748 ins_encode %{ 18749 int vector_len = 0; 18750 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18751 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18752 %} 18753 ins_pipe( pipe_slow ); 18754 %} 18755 18756 instruct vcvt4Sto4F_reg(vecX dst, vecD src) %{ 18757 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18758 match(Set dst (VectorCastS2X src)); 18759 format %{ "vpmovsxwd $dst,$src\n\t" 18760 "vcvtdq2ps $dst,$dst\t! convert 4S to 4F vector" %} 18761 ins_encode %{ 18762 int vector_len = 0; 18763 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18764 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18765 %} 18766 ins_pipe( pipe_slow ); 18767 %} 18768 18769 instruct vcvt8Sto8F_reg(vecY dst, vecX src) %{ 18770 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18771 match(Set dst (VectorCastS2X src)); 18772 format %{ "vpmovsxwd $dst,$src\n\t" 18773 "vcvtdq2ps $dst,$dst\t! convert 8S to 8F vector" %} 18774 ins_encode %{ 18775 int vector_len = 1; 18776 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18777 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18778 %} 18779 ins_pipe( pipe_slow ); 18780 %} 18781 18782 instruct vcvt16Sto16F_reg(vecZ dst, vecY src) %{ 18783 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18784 match(Set dst (VectorCastS2X src)); 18785 format %{ "vpmovsxwd $dst,$src\n\t" 18786 "vcvtdq2ps $dst,$dst\t! convert 16S to 16F vector" %} 18787 ins_encode %{ 18788 int vector_len = 2; 18789 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18790 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18791 %} 18792 ins_pipe( pipe_slow ); 18793 %} 18794 18795 instruct vcvt2Sto2D_reg(vecX dst, vecS src) %{ 18796 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18797 match(Set dst (VectorCastS2X src)); 18798 format %{ "vpmovsxwd $dst,$src\n\t" 18799 "vcvtdq2pd $dst,$dst\t! convert 2S to 2D vector" %} 18800 ins_encode %{ 18801 int vector_len = 0; 18802 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18803 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18804 %} 18805 ins_pipe( pipe_slow ); 18806 %} 18807 18808 instruct vcvt4Sto4D_reg(vecY dst, vecD src) %{ 18809 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18810 match(Set dst (VectorCastS2X src)); 18811 format %{ "vpmovsxwd $dst,$src\n\t" 18812 "vcvtdq2pd $dst,$dst\t! convert 4S to 4D vector" %} 18813 ins_encode %{ 18814 int vector_len = 1; 18815 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18816 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18817 %} 18818 ins_pipe( pipe_slow ); 18819 %} 18820 18821 instruct vcvt8Sto8D_reg(vecZ dst, vecX src) %{ 18822 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 18823 match(Set dst (VectorCastS2X src)); 18824 format %{ "vpmovsxwd $dst,$src\n\t" 18825 "vcvtdq2pd $dst,$dst\t! convert 8S to 8D vector" %} 18826 ins_encode %{ 18827 int vector_len = 2; 18828 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18829 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18830 %} 18831 ins_pipe( pipe_slow ); 18832 %} 18833 18834 instruct vcvt4Ito4B_reg(vecS dst, vecX src, rRegL scratch) %{ 18835 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18836 effect(TEMP scratch); 18837 match(Set dst (VectorCastI2X src)); 18838 format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" 18839 "vpackusdw $dst,$dst\n\t" 18840 "vpackuswb $dst,$dst\t! convert 4I to 4B vector" %} 18841 ins_encode %{ 18842 int vector_len = 0; 18843 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 18844 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18845 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18846 %} 18847 ins_pipe( pipe_slow ); 18848 %} 18849 18850 instruct vcvt8Ito8B_reg(vecD dst, vecY src, vecY tmp, rRegL scratch) %{ 18851 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18852 effect(TEMP scratch, TEMP tmp); 18853 match(Set dst (VectorCastI2X src)); 18854 format %{ "vpand $dst,$src,[0x000000FF000000FF]\n\t" 18855 "vextracti128 $tmp,$dst,0x1\n\t" 18856 "vpackusdw $dst,$dst,$tmp\n\t" 18857 "vpackuswb $dst,$dst\t! convert 8I to 8B vector" %} 18858 ins_encode %{ 18859 int vector_len = 1; 18860 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 18861 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18862 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18863 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 18864 %} 18865 ins_pipe( pipe_slow ); 18866 %} 18867 18868 instruct vcvt16Ito16B_reg(vecX dst, vecZ src) %{ 18869 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 18870 match(Set dst (VectorCastI2X src)); 18871 format %{ "evpmovdb $dst,$src\t! convert 16I to 16B vector" %} 18872 ins_encode %{ 18873 int vector_len = 2; 18874 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18875 %} 18876 ins_pipe( pipe_slow ); 18877 %} 18878 18879 instruct vcvt2Ito2S_reg(vecS dst, vecD src, rRegL scratch) %{ 18880 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18881 effect(TEMP scratch); 18882 match(Set dst (VectorCastI2X src)); 18883 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 18884 "vpackusdw $dst,$dst\t! convert 2I to 2S vector" %} 18885 ins_encode %{ 18886 int vector_len = 0; 18887 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 18888 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18889 %} 18890 ins_pipe( pipe_slow ); 18891 %} 18892 18893 instruct vcvt4Ito4S_reg(vecD dst, vecX src, rRegL scratch) %{ 18894 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18895 effect(TEMP scratch); 18896 match(Set dst (VectorCastI2X src)); 18897 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 18898 "vpackusdw $dst,$dst\t! convert 4I to 4S vector" %} 18899 ins_encode %{ 18900 int vector_len = 0; 18901 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 18902 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 18903 %} 18904 ins_pipe( pipe_slow ); 18905 %} 18906 18907 instruct vcvt8Ito8S_reg(vecX dst, vecY src, vecY tmp, rRegL scratch) %{ 18908 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18909 effect(TEMP scratch, TEMP tmp); 18910 match(Set dst (VectorCastI2X src)); 18911 format %{ "vpand $dst,$src,[0x0000FFFF0000FFFF]\n\t" 18912 "vextracti128 $tmp,$dst,0x1\n\t" 18913 "vpackusdw $dst,$dst,$tmp\t! convert 8I to 8S vector" %} 18914 ins_encode %{ 18915 int vector_len = 1; 18916 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 18917 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 18918 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 18919 %} 18920 ins_pipe( pipe_slow ); 18921 %} 18922 18923 instruct vcvt16Ito16S_reg(vecY dst, vecZ src) %{ 18924 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 18925 match(Set dst (VectorCastI2X src)); 18926 format %{ "evpmovdw $dst,$src\t! convert 16I to 16S vector" %} 18927 ins_encode %{ 18928 int vector_len = 2; 18929 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18930 %} 18931 ins_pipe( pipe_slow ); 18932 %} 18933 18934 instruct vcvt2Ito2L_reg(vecX dst, vecD src) %{ 18935 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18936 match(Set dst (VectorCastI2X src)); 18937 format %{ "vpmovsxdq $dst,$src\t! convert 2I to 2L vector" %} 18938 ins_encode %{ 18939 int vector_len = 0; 18940 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18941 %} 18942 ins_pipe( pipe_slow ); 18943 %} 18944 18945 instruct vcvt4Ito4L_reg(vecY dst, vecX src) %{ 18946 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18947 match(Set dst (VectorCastI2X src)); 18948 format %{ "vpmovsxdq $dst,$src\t! convert 4I to 4L vector" %} 18949 ins_encode %{ 18950 int vector_len = 1; 18951 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18952 %} 18953 ins_pipe( pipe_slow ); 18954 %} 18955 18956 instruct vcvt8Ito8L_reg(vecZ dst, vecY src) %{ 18957 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 18958 match(Set dst (VectorCastI2X src)); 18959 format %{ "vpmovsxdq $dst,$src\t! convert 8I to 8L vector" %} 18960 ins_encode %{ 18961 int vector_len = 2; 18962 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18963 %} 18964 ins_pipe( pipe_slow ); 18965 %} 18966 18967 instruct vcvt2Ito2F_reg(vecD dst, vecD src) %{ 18968 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18969 match(Set dst (VectorCastI2X src)); 18970 format %{ "vcvtdq2ps $dst,$src\t! convert 2I to 2F vector" %} 18971 ins_encode %{ 18972 int vector_len = 0; 18973 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18974 %} 18975 ins_pipe( pipe_slow ); 18976 %} 18977 18978 instruct vcvt4Ito4F_reg(vecX dst, vecX src) %{ 18979 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18980 match(Set dst (VectorCastI2X src)); 18981 format %{ "vcvtdq2ps $dst,$src\t! convert 4I to 4F vector" %} 18982 ins_encode %{ 18983 int vector_len = 0; 18984 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18985 %} 18986 ins_pipe( pipe_slow ); 18987 %} 18988 18989 instruct vcvt8Ito8F_reg(vecY dst, vecY src) %{ 18990 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 18991 match(Set dst (VectorCastI2X src)); 18992 format %{ "vcvtdq2ps $dst,$src\t! convert 8I to 8F vector" %} 18993 ins_encode %{ 18994 int vector_len = 1; 18995 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 18996 %} 18997 ins_pipe( pipe_slow ); 18998 %} 18999 19000 instruct vcvt16Ito16F_reg(vecY dst, vecY src) %{ 19001 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19002 match(Set dst (VectorCastI2X src)); 19003 format %{ "vcvtdq2ps $dst,$src\t! convert 16I to 16F vector" %} 19004 ins_encode %{ 19005 int vector_len = 2; 19006 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19007 %} 19008 ins_pipe( pipe_slow ); 19009 %} 19010 19011 instruct vcvt2Ito2D_reg(vecX dst, vecD src) %{ 19012 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19013 match(Set dst (VectorCastI2X src)); 19014 format %{ "vcvtdq2pd $dst,$src\t! convert 2I to 2D vector" %} 19015 ins_encode %{ 19016 int vector_len = 0; 19017 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19018 %} 19019 ins_pipe( pipe_slow ); 19020 %} 19021 19022 instruct vcvt4Ito4D_reg(vecY dst, vecX src) %{ 19023 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19024 match(Set dst (VectorCastI2X src)); 19025 format %{ "vcvtdq2pd $dst,$src\t! convert 4I to 4D vector" %} 19026 ins_encode %{ 19027 int vector_len = 1; 19028 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19029 %} 19030 ins_pipe( pipe_slow ); 19031 %} 19032 19033 instruct vcvt8Ito8D_reg(vecZ dst, vecY src) %{ 19034 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19035 match(Set dst (VectorCastI2X src)); 19036 format %{ "vcvtdq2pd $dst,$src\t! convert 8I to 8D vector" %} 19037 ins_encode %{ 19038 int vector_len = 2; 19039 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19040 %} 19041 ins_pipe( pipe_slow ); 19042 %} 19043 19044 instruct vcvt4Lto4B_reg(vecS dst, vecY src, rRegL scratch) %{ 19045 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 19046 match(Set dst (VectorCastL2X src)); 19047 effect(TEMP scratch); 19048 format %{ "vpermilps $dst,$src,8\n\t" 19049 "vpermpd $dst,$dst,8\n\t" 19050 "vpand $dst,$dst,[0x000000FF000000FF]\n\t" 19051 "vpackusdw $dst,$dst\n\t" 19052 "vpackuswb $dst,$dst\t! convert 4L to 4B vector" %} 19053 ins_encode %{ 19054 int vector_len = 1; 19055 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19056 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 19057 // Since cast to int has been done, do rest of operations in 128. 19058 vector_len = 0; 19059 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vector_len, $scratch$$Register); 19060 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19061 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19062 %} 19063 ins_pipe( pipe_slow ); 19064 %} 19065 19066 instruct vcvt8Lto8B_reg(vecD dst, vecZ src) %{ 19067 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 19068 match(Set dst (VectorCastL2X src)); 19069 format %{ "evpmovqb $dst,$src\t! convert 8L to 8B vector" %} 19070 ins_encode %{ 19071 int vector_len = 2; 19072 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19073 %} 19074 ins_pipe( pipe_slow ); 19075 %} 19076 19077 instruct vcvt2Lto2S_reg(vecS dst, vecX src, rRegL scratch) %{ 19078 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 19079 match(Set dst (VectorCastL2X src)); 19080 effect(TEMP scratch); 19081 format %{ "vpshufd $dst,$src,8\n\t" 19082 "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" 19083 "vpackusdw $dst,$dst\t! convert 2L to 2S vector" %} 19084 ins_encode %{ 19085 int vector_len = 0; 19086 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19087 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 19088 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19089 %} 19090 ins_pipe( pipe_slow ); 19091 %} 19092 19093 instruct vcvt4Lto4S_reg(vecD dst, vecY src, rRegL scratch) %{ 19094 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 19095 match(Set dst (VectorCastL2X src)); 19096 effect(TEMP scratch); 19097 format %{ "vpermilps $dst,$src,8\n\t" 19098 "vpermpd $dst,$dst,8\n\t" 19099 "vpand $dst,$dst,[0x0000FFFF0000FFFF]\n\t" 19100 "vpackusdw $dst,$dst\t! convert 4L to 4S vector" %} 19101 ins_encode %{ 19102 int vector_len = 1; 19103 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19104 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 19105 // Since cast to int has been done, do rest of operations in 128. 19106 vector_len = 0; 19107 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vector_len, $scratch$$Register); 19108 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 19109 %} 19110 ins_pipe( pipe_slow ); 19111 %} 19112 19113 instruct vcvt8Lto8S_reg(vecX dst, vecZ src) %{ 19114 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 19115 match(Set dst (VectorCastL2X src)); 19116 format %{ "evpmovqw $dst,$src\t! convert 8L to 8S vector" %} 19117 ins_encode %{ 19118 int vector_len = 2; 19119 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19120 %} 19121 ins_pipe( pipe_slow ); 19122 %} 19123 19124 instruct vcvt1Lto1I_reg(vecS dst, vecD src) %{ 19125 predicate(n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19126 match(Set dst (VectorCastL2X src)); 19127 format %{ "movdqu $dst,$src\t! convert 1L to 1I vector" %} 19128 ins_encode %{ 19129 // If register is the same, then move is not needed. 19130 if ($dst$$XMMRegister != $src$$XMMRegister) { 19131 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 19132 } 19133 %} 19134 ins_pipe( pipe_slow ); 19135 %} 19136 19137 instruct vcvt2Lto2I_reg(vecD dst, vecX src) %{ 19138 predicate(UseAVX == 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19139 match(Set dst (VectorCastL2X src)); 19140 format %{ "pshufd $dst,$src,8\t! convert 2L to 2I vector" %} 19141 ins_encode %{ 19142 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 19143 %} 19144 ins_pipe( pipe_slow ); 19145 %} 19146 19147 instruct vcvt2Lto2I_reg_avx(vecD dst, vecX src) %{ 19148 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19149 match(Set dst (VectorCastL2X src)); 19150 format %{ "vpshufd $dst,$src,8\t! convert 2L to 2I vector" %} 19151 ins_encode %{ 19152 int vector_len = 0; 19153 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19154 %} 19155 ins_pipe( pipe_slow ); 19156 %} 19157 19158 instruct vcvt4Lto4I_reg(vecX dst, vecY src) %{ 19159 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19160 match(Set dst (VectorCastL2X src)); 19161 format %{ "vpermilps $dst,$src,8\n\t" 19162 "vpermpd $dst,$dst,8\t! convert 4L to 4I vector" %} 19163 ins_encode %{ 19164 int vector_len = 1; 19165 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vector_len); 19166 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vector_len); 19167 %} 19168 ins_pipe( pipe_slow ); 19169 %} 19170 19171 instruct vcvt8Lto8I_reg(vecY dst, vecZ src) %{ 19172 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 19173 match(Set dst (VectorCastL2X src)); 19174 format %{ "evpmovqd $dst,$src\t! convert 8L to 8I vector" %} 19175 ins_encode %{ 19176 int vector_len = 2; 19177 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19178 %} 19179 ins_pipe( pipe_slow ); 19180 %} 19181 19182 instruct vcvt2Lto2F_reg(vecD dst, vecX src) %{ 19183 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19184 match(Set dst (VectorCastL2X src)); 19185 format %{ "vcvtqq2ps $dst,$src\t! convert 2L to 2F vector" %} 19186 ins_encode %{ 19187 int vector_len = 0; 19188 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19189 %} 19190 ins_pipe( pipe_slow ); 19191 %} 19192 19193 instruct vcvt4Lto4F_reg(vecX dst, vecY src) %{ 19194 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19195 match(Set dst (VectorCastL2X src)); 19196 format %{ "vcvtqq2ps $dst,$src\t! convert 4L to 4F vector" %} 19197 ins_encode %{ 19198 int vector_len = 1; 19199 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19200 %} 19201 ins_pipe( pipe_slow ); 19202 %} 19203 19204 instruct vcvt8Lto8F_reg(vecY dst, vecZ src) %{ 19205 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19206 match(Set dst (VectorCastL2X src)); 19207 format %{ "vcvtqq2ps $dst,$src\t! convert 8L to 8F vector" %} 19208 ins_encode %{ 19209 int vector_len = 2; 19210 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19211 %} 19212 ins_pipe( pipe_slow ); 19213 %} 19214 19215 instruct vcvt1Lto1D_reg(vecD dst, vecD src) %{ 19216 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19217 match(Set dst (VectorCastL2X src)); 19218 format %{ "vcvtqq2pd $dst,$src\t! convert 1L to 1D vector" %} 19219 ins_encode %{ 19220 int vector_len = 0; 19221 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19222 %} 19223 ins_pipe( pipe_slow ); 19224 %} 19225 19226 instruct vcvt2Lto2D_reg(vecX dst, vecX src) %{ 19227 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19228 match(Set dst (VectorCastL2X src)); 19229 format %{ "vcvtqq2pd $dst,$src\t! convert 2L to 2D vector" %} 19230 ins_encode %{ 19231 int vector_len = 0; 19232 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19233 %} 19234 ins_pipe( pipe_slow ); 19235 %} 19236 19237 instruct vcvt4Lto4D_reg(vecY dst, vecY src) %{ 19238 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19239 match(Set dst (VectorCastL2X src)); 19240 format %{ "vcvtqq2pd $dst,$src\t! convert 4L to 4D vector" %} 19241 ins_encode %{ 19242 int vector_len = 1; 19243 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19244 %} 19245 ins_pipe( pipe_slow ); 19246 %} 19247 19248 instruct vcvt8Lto8D_reg(vecZ dst, vecZ src) %{ 19249 predicate(UseAVX > 2 && VM_Version::supports_avx512dq() && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19250 match(Set dst (VectorCastL2X src)); 19251 format %{ "vcvtqq2pd $dst,$src\t! convert 8L to 8D vector" %} 19252 ins_encode %{ 19253 int vector_len = 2; 19254 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19255 %} 19256 ins_pipe( pipe_slow ); 19257 %} 19258 19259 instruct vcvt2Fto2D_reg(vecX dst, vecD src) %{ 19260 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19261 match(Set dst (VectorCastF2X src)); 19262 format %{ "vcvtps2pd $dst,$src\t! convert 2F to 2D vector" %} 19263 ins_encode %{ 19264 int vector_len = 0; 19265 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19266 %} 19267 ins_pipe( pipe_slow ); 19268 %} 19269 19270 instruct vcvt4Fto4D_reg(vecY dst, vecX src) %{ 19271 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19272 match(Set dst (VectorCastF2X src)); 19273 format %{ "vcvtps2pd $dst,$src\t! convert 4F to 4D vector" %} 19274 ins_encode %{ 19275 int vector_len = 1; 19276 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19277 %} 19278 ins_pipe( pipe_slow ); 19279 %} 19280 19281 instruct vcvt8Fto8D_reg(vecZ dst, vecY src) %{ 19282 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19283 match(Set dst (VectorCastF2X src)); 19284 format %{ "evcvtps2pd $dst,$src\t! convert 8F to 8D vector" %} 19285 ins_encode %{ 19286 int vector_len = 2; 19287 __ evcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19288 %} 19289 ins_pipe( pipe_slow ); 19290 %} 19291 19292 instruct vcvt2Dto2F_reg(vecD dst, vecX src) %{ 19293 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19294 match(Set dst (VectorCastD2X src)); 19295 format %{ "vcvtpd2ps $dst,$src\t! convert 2D to 2F vector" %} 19296 ins_encode %{ 19297 int vector_len = 0; 19298 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19299 %} 19300 ins_pipe( pipe_slow ); 19301 %} 19302 19303 instruct vcvt4Dto4F_reg(vecX dst, vecY src) %{ 19304 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19305 match(Set dst (VectorCastD2X src)); 19306 format %{ "vcvtpd2ps $dst,$src\t! convert 4D to 4F vector" %} 19307 ins_encode %{ 19308 int vector_len = 1; 19309 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19310 %} 19311 ins_pipe( pipe_slow ); 19312 %} 19313 19314 instruct vcvt8Dto8F_reg(vecY dst, vecZ src) %{ 19315 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19316 match(Set dst (VectorCastD2X src)); 19317 format %{ "evcvtpd2ps $dst,$src\t! convert 8D to 8F vector" %} 19318 ins_encode %{ 19319 int vector_len = 2; 19320 __ evcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 19321 %} 19322 ins_pipe( pipe_slow ); 19323 %} 19324 19325 instruct vcmpeq2F(vecD dst, vecD src1, vecD src2) %{ 19326 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19327 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19328 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19329 match(Set dst (VectorMaskCmp src1 src2)); 19330 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed2F" %} 19331 ins_encode %{ 19332 int vector_len = 0; 19333 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19334 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19335 %} 19336 ins_pipe( pipe_slow ); 19337 %} 19338 19339 instruct vcmpeq4F(vecX dst, vecX src1, vecX src2) %{ 19340 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19341 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19342 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19343 match(Set dst (VectorMaskCmp src1 src2)); 19344 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed4F" %} 19345 ins_encode %{ 19346 int vector_len = 0; 19347 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19348 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19349 %} 19350 ins_pipe( pipe_slow ); 19351 %} 19352 19353 instruct vcmpeq8F(vecY dst, vecY src1, vecY src2) %{ 19354 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19355 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19356 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19357 match(Set dst (VectorMaskCmp src1 src2)); 19358 format %{ "vcmpeqps $dst,$src1,$src2\t! cmpeq packed8F" %} 19359 ins_encode %{ 19360 int vector_len = 1; 19361 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19362 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19363 %} 19364 ins_pipe( pipe_slow ); 19365 %} 19366 19367 instruct vcmpeq16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19368 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19369 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19370 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19371 match(Set dst (VectorMaskCmp src1 src2)); 19372 effect(TEMP dst, TEMP scratch); 19373 format %{ "vcmpeqps k2,$src1,$src2\n\t" 19374 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16F" %} 19375 ins_encode %{ 19376 int vector_len = 2; 19377 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19378 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19379 KRegister mask = k0; // The comparison itself is not being masked. 19380 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19381 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19382 %} 19383 ins_pipe( pipe_slow ); 19384 %} 19385 19386 instruct vcmplt2F(vecD dst, vecD src1, vecD src2) %{ 19387 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19388 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19389 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19390 match(Set dst (VectorMaskCmp src1 src2)); 19391 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed2F" %} 19392 ins_encode %{ 19393 int vector_len = 0; 19394 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19395 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19396 %} 19397 ins_pipe( pipe_slow ); 19398 %} 19399 19400 instruct vcmplt4F(vecX dst, vecX src1, vecX src2) %{ 19401 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19402 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19403 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19404 match(Set dst (VectorMaskCmp src1 src2)); 19405 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed4F" %} 19406 ins_encode %{ 19407 int vector_len = 0; 19408 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19409 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19410 %} 19411 ins_pipe( pipe_slow ); 19412 %} 19413 19414 instruct vcmplt8F(vecY dst, vecY src1, vecY src2) %{ 19415 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19416 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19417 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19418 match(Set dst (VectorMaskCmp src1 src2)); 19419 format %{ "vcmpltps $dst,$src1,$src2\t! cmplt packed8F" %} 19420 ins_encode %{ 19421 int vector_len = 1; 19422 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19423 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19424 %} 19425 ins_pipe( pipe_slow ); 19426 %} 19427 19428 instruct vcmplt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19429 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19430 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19431 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19432 match(Set dst (VectorMaskCmp src1 src2)); 19433 effect(TEMP dst, TEMP scratch); 19434 format %{ "vcmpltps k2,$src1,$src2\n\t" 19435 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed16F" %} 19436 ins_encode %{ 19437 int vector_len = 2; 19438 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19439 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19440 KRegister mask = k0; // The comparison itself is not being masked. 19441 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19442 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19443 %} 19444 ins_pipe( pipe_slow ); 19445 %} 19446 19447 instruct vcmpgt2F(vecD dst, vecD src1, vecD src2) %{ 19448 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19449 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19450 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19451 match(Set dst (VectorMaskCmp src1 src2)); 19452 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed2F" %} 19453 ins_encode %{ 19454 int vector_len = 0; 19455 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19456 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19457 %} 19458 ins_pipe( pipe_slow ); 19459 %} 19460 19461 instruct vcmpgt4F(vecX dst, vecX src1, vecX src2) %{ 19462 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19463 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19464 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19465 match(Set dst (VectorMaskCmp src1 src2)); 19466 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed4F" %} 19467 ins_encode %{ 19468 int vector_len = 0; 19469 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19470 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19471 %} 19472 ins_pipe( pipe_slow ); 19473 %} 19474 19475 instruct vcmpgt8F(vecY dst, vecY src1, vecY src2) %{ 19476 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19477 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19478 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19479 match(Set dst (VectorMaskCmp src1 src2)); 19480 format %{ "vcmpgtps $dst,$src1,$src2\t! cmpgt packed8F" %} 19481 ins_encode %{ 19482 int vector_len = 1; 19483 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19484 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19485 %} 19486 ins_pipe( pipe_slow ); 19487 %} 19488 19489 instruct vcmpgt16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19490 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19491 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19492 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19493 match(Set dst (VectorMaskCmp src1 src2)); 19494 effect(TEMP dst, TEMP scratch); 19495 format %{ "vcmpgtps k2,$src1,$src2\n\t" 19496 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16F" %} 19497 ins_encode %{ 19498 int vector_len = 2; 19499 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19500 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19501 KRegister mask = k0; // The comparison itself is not being masked. 19502 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19503 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19504 %} 19505 ins_pipe( pipe_slow ); 19506 %} 19507 19508 instruct vcmpge2F(vecD dst, vecD src1, vecD src2) %{ 19509 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19510 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19511 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19512 match(Set dst (VectorMaskCmp src1 src2)); 19513 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed2F" %} 19514 ins_encode %{ 19515 int vector_len = 0; 19516 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19517 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19518 %} 19519 ins_pipe( pipe_slow ); 19520 %} 19521 19522 instruct vcmpge4F(vecX dst, vecX src1, vecX src2) %{ 19523 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19524 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19525 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19526 match(Set dst (VectorMaskCmp src1 src2)); 19527 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed4F" %} 19528 ins_encode %{ 19529 int vector_len = 0; 19530 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19531 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19532 %} 19533 ins_pipe( pipe_slow ); 19534 %} 19535 19536 instruct vcmpge8F(vecY dst, vecY src1, vecY src2) %{ 19537 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19538 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19539 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19540 match(Set dst (VectorMaskCmp src1 src2)); 19541 format %{ "vcmpgeps $dst,$src1,$src2\t! cmpge packed8F" %} 19542 ins_encode %{ 19543 int vector_len = 1; 19544 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19545 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19546 %} 19547 ins_pipe( pipe_slow ); 19548 %} 19549 19550 instruct vcmpge16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19551 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19552 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19553 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19554 match(Set dst (VectorMaskCmp src1 src2)); 19555 effect(TEMP dst, TEMP scratch); 19556 format %{ "vcmpgeps k2,$src1,$src2\n\t" 19557 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16F" %} 19558 ins_encode %{ 19559 int vector_len = 2; 19560 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19561 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19562 KRegister mask = k0; // The comparison itself is not being masked. 19563 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19564 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19565 %} 19566 ins_pipe( pipe_slow ); 19567 %} 19568 19569 instruct vcmple2F(vecD dst, vecD src1, vecD src2) %{ 19570 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19571 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19572 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19573 match(Set dst (VectorMaskCmp src1 src2)); 19574 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed2F" %} 19575 ins_encode %{ 19576 int vector_len = 0; 19577 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19578 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19579 %} 19580 ins_pipe( pipe_slow ); 19581 %} 19582 19583 instruct vcmple4F(vecX dst, vecX src1, vecX src2) %{ 19584 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19585 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19586 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19587 match(Set dst (VectorMaskCmp src1 src2)); 19588 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed4F" %} 19589 ins_encode %{ 19590 int vector_len = 0; 19591 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19592 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19593 %} 19594 ins_pipe( pipe_slow ); 19595 %} 19596 19597 instruct vcmple8F(vecY dst, vecY src1, vecY src2) %{ 19598 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19599 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19600 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19601 match(Set dst (VectorMaskCmp src1 src2)); 19602 format %{ "vcmpleps $dst,$src1,$src2\t! cmple packed8F" %} 19603 ins_encode %{ 19604 int vector_len = 1; 19605 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19606 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19607 %} 19608 ins_pipe( pipe_slow ); 19609 %} 19610 19611 instruct vcmple16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19612 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19613 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19614 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19615 match(Set dst (VectorMaskCmp src1 src2)); 19616 effect(TEMP dst, TEMP scratch); 19617 format %{ "vcmpleps k2,$src1,$src2\n\t" 19618 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16F" %} 19619 ins_encode %{ 19620 int vector_len = 2; 19621 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19622 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19623 KRegister mask = k0; // The comparison itself is not being masked. 19624 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19625 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19626 %} 19627 ins_pipe( pipe_slow ); 19628 %} 19629 19630 instruct vcmpne2F(vecD dst, vecD src1, vecD src2) %{ 19631 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19632 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19633 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19634 match(Set dst (VectorMaskCmp src1 src2)); 19635 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed2F" %} 19636 ins_encode %{ 19637 int vector_len = 0; 19638 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19639 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19640 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19641 %} 19642 ins_pipe( pipe_slow ); 19643 %} 19644 19645 instruct vcmpne4F(vecX dst, vecX src1, vecX src2) %{ 19646 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19647 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19648 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19649 match(Set dst (VectorMaskCmp src1 src2)); 19650 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed4F" %} 19651 ins_encode %{ 19652 int vector_len = 0; 19653 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19654 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19655 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19656 %} 19657 ins_pipe( pipe_slow ); 19658 %} 19659 19660 instruct vcmpne8F(vecY dst, vecY src1, vecY src2) %{ 19661 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 19662 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19663 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19664 match(Set dst (VectorMaskCmp src1 src2)); 19665 format %{ "vcmpneps $dst,$src1,$src2\t! cmpne packed8F" %} 19666 ins_encode %{ 19667 int vector_len = 1; 19668 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19669 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19670 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19671 %} 19672 ins_pipe( pipe_slow ); 19673 %} 19674 19675 instruct vcmpne16F(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19676 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 19677 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 19678 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 19679 match(Set dst (VectorMaskCmp src1 src2)); 19680 effect(TEMP dst, TEMP scratch); 19681 format %{ "vcmpneps k2,$src1,$src2\n\t" 19682 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed16F" %} 19683 ins_encode %{ 19684 int vector_len = 2; 19685 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 19686 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 19687 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19688 KRegister mask = k0; // The comparison itself is not being masked. 19689 __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19690 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19691 %} 19692 ins_pipe( pipe_slow ); 19693 %} 19694 19695 instruct vcmpeq1D(vecD dst, vecD src1, vecD src2) %{ 19696 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19697 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19698 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19699 match(Set dst (VectorMaskCmp src1 src2)); 19700 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed1D" %} 19701 ins_encode %{ 19702 int vector_len = 0; 19703 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19704 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19705 %} 19706 ins_pipe( pipe_slow ); 19707 %} 19708 19709 instruct vcmpeq2D(vecX dst, vecX src1, vecX src2) %{ 19710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19711 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19712 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19713 match(Set dst (VectorMaskCmp src1 src2)); 19714 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed2D" %} 19715 ins_encode %{ 19716 int vector_len = 0; 19717 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19718 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19719 %} 19720 ins_pipe( pipe_slow ); 19721 %} 19722 19723 instruct vcmpeq4D(vecY dst, vecY src1, vecY src2) %{ 19724 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19725 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19726 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19727 match(Set dst (VectorMaskCmp src1 src2)); 19728 format %{ "vcmpeqpd $dst,$src1,$src2\t! cmpeq packed4D" %} 19729 ins_encode %{ 19730 int vector_len = 1; 19731 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19732 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19733 %} 19734 ins_pipe( pipe_slow ); 19735 %} 19736 19737 instruct vcmpeq8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19738 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19739 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 19740 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19741 match(Set dst (VectorMaskCmp src1 src2)); 19742 effect(TEMP dst, TEMP scratch); 19743 format %{ "vcmpeqpd k2,$src1,$src2\n\t" 19744 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8D" %} 19745 ins_encode %{ 19746 int vector_len = 2; 19747 Assembler::ComparisonPredicateFP cmp = Assembler::EQ_OQ; // ordered non-signaling 19748 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19749 KRegister mask = k0; // The comparison itself is not being masked. 19750 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19751 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19752 %} 19753 ins_pipe( pipe_slow ); 19754 %} 19755 19756 instruct vcmplt1D(vecD dst, vecD src1, vecD src2) %{ 19757 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19758 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19759 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19760 match(Set dst (VectorMaskCmp src1 src2)); 19761 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed1D" %} 19762 ins_encode %{ 19763 int vector_len = 0; 19764 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19765 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19766 %} 19767 ins_pipe( pipe_slow ); 19768 %} 19769 19770 instruct vcmplt2D(vecX dst, vecX src1, vecX src2) %{ 19771 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19772 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19773 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19774 match(Set dst (VectorMaskCmp src1 src2)); 19775 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed2D" %} 19776 ins_encode %{ 19777 int vector_len = 0; 19778 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19779 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19780 %} 19781 ins_pipe( pipe_slow ); 19782 %} 19783 19784 instruct vcmplt4D(vecY dst, vecY src1, vecY src2) %{ 19785 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19786 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19787 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19788 match(Set dst (VectorMaskCmp src1 src2)); 19789 format %{ "vcmpltpd $dst,$src1,$src2\t! cmplt packed4D" %} 19790 ins_encode %{ 19791 int vector_len = 1; 19792 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19793 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19794 %} 19795 ins_pipe( pipe_slow ); 19796 %} 19797 19798 instruct vcmplt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19799 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19800 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 19801 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19802 match(Set dst (VectorMaskCmp src1 src2)); 19803 effect(TEMP dst, TEMP scratch); 19804 format %{ "vcmpltpd k2,$src1,$src2\n\t" 19805 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmplt packed8D" %} 19806 ins_encode %{ 19807 int vector_len = 2; 19808 Assembler::ComparisonPredicateFP cmp = Assembler::LT_OQ; //ordered non-signaling 19809 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19810 KRegister mask = k0; // The comparison itself is not being masked. 19811 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19812 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19813 %} 19814 ins_pipe( pipe_slow ); 19815 %} 19816 19817 instruct vcmpgt1D(vecD dst, vecD src1, vecD src2) %{ 19818 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19819 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19820 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19821 match(Set dst (VectorMaskCmp src1 src2)); 19822 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed1D" %} 19823 ins_encode %{ 19824 int vector_len = 0; 19825 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19826 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19827 %} 19828 ins_pipe( pipe_slow ); 19829 %} 19830 19831 instruct vcmpgt2D(vecX dst, vecX src1, vecX src2) %{ 19832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19833 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19834 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19835 match(Set dst (VectorMaskCmp src1 src2)); 19836 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed2D" %} 19837 ins_encode %{ 19838 int vector_len = 0; 19839 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19840 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19841 %} 19842 ins_pipe( pipe_slow ); 19843 %} 19844 19845 instruct vcmpgt4D(vecY dst, vecY src1, vecY src2) %{ 19846 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19847 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19848 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19849 match(Set dst (VectorMaskCmp src1 src2)); 19850 format %{ "vcmpgtpd $dst,$src1,$src2\t! cmpgt packed4D" %} 19851 ins_encode %{ 19852 int vector_len = 1; 19853 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19854 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19855 %} 19856 ins_pipe( pipe_slow ); 19857 %} 19858 19859 instruct vcmpgt8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19860 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19861 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 19862 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19863 match(Set dst (VectorMaskCmp src1 src2)); 19864 effect(TEMP dst, TEMP scratch); 19865 format %{ "vcmpgtpd k2,$src1,$src2\n\t" 19866 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8D" %} 19867 ins_encode %{ 19868 int vector_len = 2; 19869 Assembler::ComparisonPredicateFP cmp = Assembler::GT_OQ; //ordered non-signaling 19870 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19871 KRegister mask = k0; // The comparison itself is not being masked. 19872 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19873 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19874 %} 19875 ins_pipe( pipe_slow ); 19876 %} 19877 19878 instruct vcmpge1D(vecD dst, vecD src1, vecD src2) %{ 19879 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19880 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19881 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19882 match(Set dst (VectorMaskCmp src1 src2)); 19883 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed1D" %} 19884 ins_encode %{ 19885 int vector_len = 0; 19886 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19887 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19888 %} 19889 ins_pipe( pipe_slow ); 19890 %} 19891 19892 instruct vcmpge2D(vecX dst, vecX src1, vecX src2) %{ 19893 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19894 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19895 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19896 match(Set dst (VectorMaskCmp src1 src2)); 19897 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed2D" %} 19898 ins_encode %{ 19899 int vector_len = 0; 19900 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19901 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19902 %} 19903 ins_pipe( pipe_slow ); 19904 %} 19905 19906 instruct vcmpge4D(vecY dst, vecY src1, vecY src2) %{ 19907 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19908 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19909 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19910 match(Set dst (VectorMaskCmp src1 src2)); 19911 format %{ "vcmpgepd $dst,$src1,$src2\t! cmpge packed4D" %} 19912 ins_encode %{ 19913 int vector_len = 1; 19914 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19915 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19916 %} 19917 ins_pipe( pipe_slow ); 19918 %} 19919 19920 instruct vcmpge8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19921 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19922 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 19923 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19924 match(Set dst (VectorMaskCmp src1 src2)); 19925 effect(TEMP dst, TEMP scratch); 19926 format %{ "vcmpgepd k2,$src1,$src2\n\t" 19927 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8D" %} 19928 ins_encode %{ 19929 int vector_len = 2; 19930 Assembler::ComparisonPredicateFP cmp = Assembler::GE_OQ; //ordered non-signaling 19931 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19932 KRegister mask = k0; // The comparison itself is not being masked. 19933 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19934 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19935 %} 19936 ins_pipe( pipe_slow ); 19937 %} 19938 19939 instruct vcmple1D(vecD dst, vecD src1, vecD src2) %{ 19940 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 19941 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19942 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19943 match(Set dst (VectorMaskCmp src1 src2)); 19944 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed1D" %} 19945 ins_encode %{ 19946 int vector_len = 0; 19947 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19948 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19949 %} 19950 ins_pipe( pipe_slow ); 19951 %} 19952 19953 instruct vcmple2D(vecX dst, vecX src1, vecX src2) %{ 19954 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 19955 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19956 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19957 match(Set dst (VectorMaskCmp src1 src2)); 19958 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed2D" %} 19959 ins_encode %{ 19960 int vector_len = 0; 19961 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19962 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19963 %} 19964 ins_pipe( pipe_slow ); 19965 %} 19966 19967 instruct vcmple4D(vecY dst, vecY src1, vecY src2) %{ 19968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 19969 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19970 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19971 match(Set dst (VectorMaskCmp src1 src2)); 19972 format %{ "vcmplepd $dst,$src1,$src2\t! cmple packed4D" %} 19973 ins_encode %{ 19974 int vector_len = 1; 19975 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19976 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19977 %} 19978 ins_pipe( pipe_slow ); 19979 %} 19980 19981 instruct vcmple8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 19982 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 19983 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 19984 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 19985 match(Set dst (VectorMaskCmp src1 src2)); 19986 effect(TEMP dst, TEMP scratch); 19987 format %{ "vcmplepd k2,$src1,$src2\n\t" 19988 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8D" %} 19989 ins_encode %{ 19990 int vector_len = 2; 19991 Assembler::ComparisonPredicateFP cmp = Assembler::LE_OQ; //ordered non-signaling 19992 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 19993 KRegister mask = k0; // The comparison itself is not being masked. 19994 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 19995 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 19996 %} 19997 ins_pipe( pipe_slow ); 19998 %} 19999 20000 instruct vcmpne1D(vecD dst, vecD src1, vecD src2) %{ 20001 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 20002 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20003 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20004 match(Set dst (VectorMaskCmp src1 src2)); 20005 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed1D" %} 20006 ins_encode %{ 20007 int vector_len = 0; 20008 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20009 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20010 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20011 %} 20012 ins_pipe( pipe_slow ); 20013 %} 20014 20015 instruct vcmpne2D(vecX dst, vecX src1, vecX src2) %{ 20016 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20017 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20018 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20019 match(Set dst (VectorMaskCmp src1 src2)); 20020 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed2D" %} 20021 ins_encode %{ 20022 int vector_len = 0; 20023 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20024 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20025 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20026 %} 20027 ins_pipe( pipe_slow ); 20028 %} 20029 20030 instruct vcmpne4D(vecY dst, vecY src1, vecY src2) %{ 20031 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20032 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20033 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20034 match(Set dst (VectorMaskCmp src1 src2)); 20035 format %{ "vcmpnepd $dst,$src1,$src2\t! cmpne packed4D" %} 20036 ins_encode %{ 20037 int vector_len = 1; 20038 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20039 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20040 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20041 %} 20042 ins_pipe( pipe_slow ); 20043 %} 20044 20045 instruct vcmpne8D(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20046 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 20047 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20048 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 20049 match(Set dst (VectorMaskCmp src1 src2)); 20050 effect(TEMP dst, TEMP scratch); 20051 format %{ "vcmpnepd k2,$src1,$src2\n\t" 20052 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpne packed8D" %} 20053 ins_encode %{ 20054 int vector_len = 2; 20055 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 20056 Assembler::ComparisonPredicateFP cmp = Assembler::NEQ_UQ; //unordered non-signaling 20057 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20058 KRegister mask = k0; // The comparison itself is not being masked. 20059 __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20060 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20061 %} 20062 ins_pipe( pipe_slow ); 20063 %} 20064 20065 instruct vcmpeq2I(vecD dst, vecD src1, vecD src2) %{ 20066 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20067 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20068 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20069 match(Set dst (VectorMaskCmp src1 src2)); 20070 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed2I" %} 20071 ins_encode %{ 20072 int vector_len = 0; 20073 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20074 %} 20075 ins_pipe( pipe_slow ); 20076 %} 20077 20078 instruct vcmpeq4I(vecX dst, vecX src1, vecX src2) %{ 20079 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20080 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20081 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20082 match(Set dst (VectorMaskCmp src1 src2)); 20083 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed4I" %} 20084 ins_encode %{ 20085 int vector_len = 0; 20086 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20087 %} 20088 ins_pipe( pipe_slow ); 20089 %} 20090 20091 instruct vcmpeq8I(vecY dst, vecY src1, vecY src2) %{ 20092 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20093 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20094 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20095 match(Set dst (VectorMaskCmp src1 src2)); 20096 format %{ "vpcmpeqd $dst,$src1,$src2\n\t! cmpeq packed8I" %} 20097 ins_encode %{ 20098 int vector_len = 1; 20099 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20100 %} 20101 ins_pipe( pipe_slow ); 20102 %} 20103 20104 instruct vcmpeq16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20105 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20106 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20107 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20108 match(Set dst (VectorMaskCmp src1 src2)); 20109 effect(TEMP dst, TEMP scratch); 20110 format %{ "vpcmpeqd k2,$src1,$src2\n\t" 20111 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed16I" %} 20112 ins_encode %{ 20113 int vector_len = 2; 20114 Assembler::ComparisonPredicate cmp = Assembler::eq; 20115 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20116 KRegister mask = k0; // The comparison itself is not being masked. 20117 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20118 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20119 %} 20120 ins_pipe( pipe_slow ); 20121 %} 20122 20123 instruct vcmplt2I(vecD dst, vecD src1, vecD src2) %{ 20124 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20125 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20126 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20127 match(Set dst (VectorMaskCmp src1 src2)); 20128 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed2I" %} 20129 ins_encode %{ 20130 int vector_len = 0; 20131 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20132 %} 20133 ins_pipe( pipe_slow ); 20134 %} 20135 20136 instruct vcmplt4I(vecX dst, vecX src1, vecX src2) %{ 20137 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20138 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20139 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20140 match(Set dst (VectorMaskCmp src1 src2)); 20141 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed4I" %} 20142 ins_encode %{ 20143 int vector_len = 0; 20144 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20145 %} 20146 ins_pipe( pipe_slow ); 20147 %} 20148 20149 instruct vcmplt8I(vecY dst, vecY src1, vecY src2) %{ 20150 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20151 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20152 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20153 match(Set dst (VectorMaskCmp src1 src2)); 20154 format %{ "vpcmpgtd $dst,$src2,$src1\t! cmplt packed8I" %} 20155 ins_encode %{ 20156 int vector_len = 1; 20157 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20158 %} 20159 ins_pipe( pipe_slow ); 20160 %} 20161 20162 instruct vcmplt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20163 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20164 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20165 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20166 match(Set dst (VectorMaskCmp src1 src2)); 20167 effect(TEMP dst, TEMP scratch); 20168 format %{ "vpcmpnled k2,$src1,$src2\n\t" 20169 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 20170 ins_encode %{ 20171 int vector_len = 2; 20172 Assembler::ComparisonPredicate cmp = Assembler::lt; 20173 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20174 KRegister mask = k0; // The comparison itself is not being masked. 20175 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20176 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20177 %} 20178 ins_pipe( pipe_slow ); 20179 %} 20180 20181 instruct vcmpgt2I(vecD dst, vecD src1, vecD src2) %{ 20182 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20183 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20184 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20185 match(Set dst (VectorMaskCmp src1 src2)); 20186 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed2I" %} 20187 ins_encode %{ 20188 int vector_len = 0; 20189 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20190 %} 20191 ins_pipe( pipe_slow ); 20192 %} 20193 20194 instruct vcmpgt4I(vecX dst, vecX src1, vecX src2) %{ 20195 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20196 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20197 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20198 match(Set dst (VectorMaskCmp src1 src2)); 20199 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed4I" %} 20200 ins_encode %{ 20201 int vector_len = 0; 20202 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20203 %} 20204 ins_pipe( pipe_slow ); 20205 %} 20206 20207 instruct vcmpgt8I(vecY dst, vecY src1, vecY src2) %{ 20208 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20209 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20210 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20211 match(Set dst (VectorMaskCmp src1 src2)); 20212 format %{ "vpcmpgtd $dst,$src1,$src2\t! cmpgt packed8I" %} 20213 ins_encode %{ 20214 int vector_len = 1; 20215 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20216 %} 20217 ins_pipe( pipe_slow ); 20218 %} 20219 20220 instruct vcmpgt16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20221 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20222 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20223 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20224 match(Set dst (VectorMaskCmp src1 src2)); 20225 effect(TEMP dst, TEMP scratch); 20226 format %{ "vpcmpnled k2,$src1,$src2\n\t" 20227 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed16I" %} 20228 ins_encode %{ 20229 int vector_len = 2; 20230 Assembler::ComparisonPredicate cmp = Assembler::nle; 20231 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20232 KRegister mask = k0; // The comparison itself is not being masked. 20233 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20234 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20235 %} 20236 ins_pipe( pipe_slow ); 20237 %} 20238 20239 instruct vcmpge2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20240 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20241 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20242 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20243 match(Set dst (VectorMaskCmp src1 src2)); 20244 effect(TEMP scratch); 20245 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 20246 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2I" %} 20247 ins_encode %{ 20248 int vector_len = 0; 20249 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20250 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20251 %} 20252 ins_pipe( pipe_slow ); 20253 %} 20254 20255 instruct vcmpge4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20256 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20257 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20258 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20259 match(Set dst (VectorMaskCmp src1 src2)); 20260 effect(TEMP scratch); 20261 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 20262 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4I" %} 20263 ins_encode %{ 20264 int vector_len = 0; 20265 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20266 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20267 %} 20268 ins_pipe( pipe_slow ); 20269 %} 20270 20271 instruct vcmpge8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 20272 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20273 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20274 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20275 match(Set dst (VectorMaskCmp src1 src2)); 20276 effect(TEMP scratch); 20277 format %{ "vpcmpgtd $dst,$src2,$src1\n\t" 20278 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8I" %} 20279 ins_encode %{ 20280 int vector_len = 1; 20281 __ vpcmpgtd($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20282 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20283 %} 20284 ins_pipe( pipe_slow ); 20285 %} 20286 20287 instruct vcmpge16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20288 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20289 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20290 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20291 match(Set dst (VectorMaskCmp src1 src2)); 20292 effect(TEMP dst, TEMP scratch); 20293 format %{ "vpcmpnltd k2,$src1,$src2\n\t" 20294 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed16I" %} 20295 ins_encode %{ 20296 int vector_len = 2; 20297 Assembler::ComparisonPredicate cmp = Assembler::nlt; 20298 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20299 KRegister mask = k0; // The comparison itself is not being masked. 20300 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20301 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20302 %} 20303 ins_pipe( pipe_slow ); 20304 %} 20305 20306 instruct vcmple2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20307 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20308 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20309 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20310 match(Set dst (VectorMaskCmp src1 src2)); 20311 effect(TEMP scratch); 20312 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 20313 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2I" %} 20314 ins_encode %{ 20315 int vector_len = 0; 20316 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20317 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20318 %} 20319 ins_pipe( pipe_slow ); 20320 %} 20321 20322 instruct vcmple4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20323 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20324 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20325 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20326 match(Set dst (VectorMaskCmp src1 src2)); 20327 effect(TEMP scratch); 20328 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 20329 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4I" %} 20330 ins_encode %{ 20331 int vector_len = 0; 20332 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20333 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20334 %} 20335 ins_pipe( pipe_slow ); 20336 %} 20337 20338 instruct vcmple8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 20339 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20340 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20341 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20342 match(Set dst (VectorMaskCmp src1 src2)); 20343 effect(TEMP scratch); 20344 format %{ "vpcmpgtd $dst,$src1,$src2\n\t" 20345 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8I" %} 20346 ins_encode %{ 20347 int vector_len = 1; 20348 __ vpcmpgtd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20349 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20350 %} 20351 ins_pipe( pipe_slow ); 20352 %} 20353 20354 instruct vcmple16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20355 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20356 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 20357 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20358 match(Set dst (VectorMaskCmp src1 src2)); 20359 effect(TEMP dst, TEMP scratch); 20360 format %{ "vpcmpled k2,$src1,$src2\n\t" 20361 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed16I" %} 20362 ins_encode %{ 20363 int vector_len = 2; 20364 Assembler::ComparisonPredicate cmp = Assembler::le; 20365 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20366 KRegister mask = k0; // The comparison itself is not being masked. 20367 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20368 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20369 %} 20370 ins_pipe( pipe_slow ); 20371 %} 20372 20373 instruct vcmpne2I(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20374 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 20375 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20376 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20377 match(Set dst (VectorMaskCmp src1 src2)); 20378 effect(TEMP scratch); 20379 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 20380 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2I" %} 20381 ins_encode %{ 20382 int vector_len = 0; 20383 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20384 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20385 %} 20386 ins_pipe( pipe_slow ); 20387 %} 20388 20389 instruct vcmpne4I(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20390 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 20391 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20392 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20393 match(Set dst (VectorMaskCmp src1 src2)); 20394 effect(TEMP scratch); 20395 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 20396 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4I" %} 20397 ins_encode %{ 20398 int vector_len = 0; 20399 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20400 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20401 %} 20402 ins_pipe( pipe_slow ); 20403 %} 20404 20405 instruct vcmpne8I(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 20406 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && 20407 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20408 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20409 match(Set dst (VectorMaskCmp src1 src2)); 20410 effect(TEMP scratch); 20411 format %{ "vpcmpeqd $dst,$src1,$src2\n\t" 20412 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8I" %} 20413 ins_encode %{ 20414 int vector_len = 1; 20415 __ vpcmpeqd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20416 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20417 %} 20418 ins_pipe( pipe_slow ); 20419 %} 20420 20421 instruct vcmpne16I(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20422 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 20423 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 20424 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); 20425 match(Set dst (VectorMaskCmp src1 src2)); 20426 effect(TEMP dst, TEMP scratch); 20427 format %{ "vpcmpneqd k2,$src1,$src2\n\t" 20428 "vmovdqu32 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed16I" %} 20429 ins_encode %{ 20430 int vector_len = 2; 20431 Assembler::ComparisonPredicate cmp = Assembler::neq; 20432 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20433 KRegister mask = k0; // The comparison itself is not being masked. 20434 __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20435 __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20436 %} 20437 ins_pipe( pipe_slow ); 20438 %} 20439 20440 instruct vcmpeq8B(vecD dst, vecD src1, vecD src2) %{ 20441 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20442 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20443 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20444 match(Set dst (VectorMaskCmp src1 src2)); 20445 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed8B" %} 20446 ins_encode %{ 20447 int vector_len = 0; 20448 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20449 %} 20450 ins_pipe( pipe_slow ); 20451 %} 20452 20453 instruct vcmpeq16B(vecX dst, vecX src1, vecX src2) %{ 20454 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20455 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20456 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20457 match(Set dst (VectorMaskCmp src1 src2)); 20458 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed16B" %} 20459 ins_encode %{ 20460 int vector_len = 0; 20461 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20462 %} 20463 ins_pipe( pipe_slow ); 20464 %} 20465 20466 instruct vcmpeq32B(vecY dst, vecY src1, vecY src2) %{ 20467 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 20468 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20469 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20470 match(Set dst (VectorMaskCmp src1 src2)); 20471 format %{ "vpcmpeqb $dst,$src1,$src2\n\t! cmpeq packed32B" %} 20472 ins_encode %{ 20473 int vector_len = 1; 20474 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20475 %} 20476 ins_pipe( pipe_slow ); 20477 %} 20478 20479 instruct vcmpeq64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20480 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 20481 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 20482 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20483 match(Set dst (VectorMaskCmp src1 src2)); 20484 effect(TEMP dst, TEMP scratch); 20485 format %{ "vpcmpeqb k2,$src1,$src2\n\t" 20486 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed64B" %} 20487 ins_encode %{ 20488 int vector_len = 2; 20489 Assembler::ComparisonPredicate cmp = Assembler::eq; 20490 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20491 KRegister mask = k0; // The comparison itself is not being masked. 20492 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20493 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20494 %} 20495 ins_pipe( pipe_slow ); 20496 %} 20497 20498 instruct vcmplt8B(vecD dst, vecD src1, vecD src2) %{ 20499 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20500 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20501 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20502 match(Set dst (VectorMaskCmp src1 src2)); 20503 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed8B" %} 20504 ins_encode %{ 20505 int vector_len = 0; 20506 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20507 %} 20508 ins_pipe( pipe_slow ); 20509 %} 20510 20511 instruct vcmplt16B(vecX dst, vecX src1, vecX src2) %{ 20512 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20513 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20514 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20515 match(Set dst (VectorMaskCmp src1 src2)); 20516 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed16B" %} 20517 ins_encode %{ 20518 int vector_len = 0; 20519 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20520 %} 20521 ins_pipe( pipe_slow ); 20522 %} 20523 20524 instruct vcmplt32B(vecY dst, vecY src1, vecY src2) %{ 20525 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 20526 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20527 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20528 match(Set dst (VectorMaskCmp src1 src2)); 20529 format %{ "vpcmpgtb $dst,$src2,$src1\t! cmplt packed32B" %} 20530 ins_encode %{ 20531 int vector_len = 1; 20532 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20533 %} 20534 ins_pipe( pipe_slow ); 20535 %} 20536 20537 instruct vcmplt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20538 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 20539 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 20540 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20541 match(Set dst (VectorMaskCmp src1 src2)); 20542 effect(TEMP dst, TEMP scratch); 20543 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 20544 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 20545 ins_encode %{ 20546 int vector_len = 2; 20547 Assembler::ComparisonPredicate cmp = Assembler::lt; 20548 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20549 KRegister mask = k0; // The comparison itself is not being masked. 20550 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20551 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20552 %} 20553 ins_pipe( pipe_slow ); 20554 %} 20555 20556 instruct vcmpgt8B(vecD dst, vecD src1, vecD src2) %{ 20557 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20558 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20559 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20560 match(Set dst (VectorMaskCmp src1 src2)); 20561 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed8B" %} 20562 ins_encode %{ 20563 int vector_len = 0; 20564 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20565 %} 20566 ins_pipe( pipe_slow ); 20567 %} 20568 20569 instruct vcmpgt16B(vecX dst, vecX src1, vecX src2) %{ 20570 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20571 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20572 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20573 match(Set dst (VectorMaskCmp src1 src2)); 20574 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed16B" %} 20575 ins_encode %{ 20576 int vector_len = 0; 20577 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20578 %} 20579 ins_pipe( pipe_slow ); 20580 %} 20581 20582 instruct vcmpgt32B(vecY dst, vecY src1, vecY src2) %{ 20583 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 20584 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20585 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20586 match(Set dst (VectorMaskCmp src1 src2)); 20587 format %{ "vpcmpgtb $dst,$src1,$src2\t! cmpgt packed32B" %} 20588 ins_encode %{ 20589 int vector_len = 1; 20590 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 20591 %} 20592 ins_pipe( pipe_slow ); 20593 %} 20594 20595 instruct vcmpgt64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 20596 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 20597 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 20598 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20599 match(Set dst (VectorMaskCmp src1 src2)); 20600 effect(TEMP dst, TEMP scratch); 20601 format %{ "vpcmpnleb k2,$src1,$src2\n\t" 20602 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed64B" %} 20603 ins_encode %{ 20604 int vector_len = 2; 20605 Assembler::ComparisonPredicate cmp = Assembler::nle; 20606 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 20607 KRegister mask = k0; // The comparison itself is not being masked. 20608 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 20609 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 20610 %} 20611 ins_pipe( pipe_slow ); 20612 %} 20613 20614 instruct vcmpge8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 20615 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 20616 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20617 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20618 match(Set dst (VectorMaskCmp src1 src2)); 20619 effect(TEMP scratch); 20620 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 20621 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8B" %} 20622 ins_encode %{ 20623 int vector_len = 0; 20624 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20625 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20626 %} 20627 ins_pipe( pipe_slow ); 20628 %} 20629 20630 instruct vcmpge16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 20631 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 20632 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 20633 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 20634 match(Set dst (VectorMaskCmp src1 src2)); 20635 effect(TEMP scratch); 20636 format %{ "vpcmpgtb $dst,$src2,$src1\n\t" 20637 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16B" %} 20638 ins_encode %{ 20639 int vector_len = 0; 20640 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 20641 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 20642 %} 20643 ins_pipe( pipe_slow ); 20644 %} 20645 20646 instruct extract8d(regD dst, vecZ src, vecZ tmp, immI idx) %{ 20647 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20648 match(Set dst (ExtractD src idx)); 20649 effect(TEMP tmp); 20650 ins_encode %{ 20651 int vector_len = 2; 20652 int midx = 0x7 & $idx$$constant; 20653 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20654 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20655 } else if (midx == 1) { 20656 __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20657 } else if (midx > 1 && midx <= 7) { 20658 int extr_idx1 = midx / 2; 20659 int extr_idx2 = midx % 2; 20660 __ vextractf32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20661 __ vpshufpd($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, extr_idx2, vector_len); 20662 } 20663 %} 20664 ins_pipe( pipe_slow ); 20665 %} 20666 20667 instruct extract4d(regD dst, vecY src, vecY tmp, immI idx) %{ 20668 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20669 match(Set dst (ExtractD src idx)); 20670 effect(TEMP tmp); 20671 ins_encode %{ 20672 int vector_len = 1; 20673 int midx = 0x3 & $idx$$constant; 20674 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20675 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20676 } else if (midx == 1) { 20677 __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20678 } else if (midx > 1 && midx <= 3) { 20679 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20680 __ vpshufpd($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, midx - 2, vector_len); 20681 } 20682 20683 %} 20684 ins_pipe( pipe_slow ); 20685 %} 20686 20687 instruct extract2d(regD dst, vecX src, immI idx) %{ 20688 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20689 match(Set dst (ExtractD src idx)); 20690 ins_encode %{ 20691 int vector_len = 0; 20692 int midx = 0x1 & $idx$$constant; 20693 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20694 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20695 } else if (midx >=1) { 20696 __ vpshufpd($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20697 } 20698 %} 20699 ins_pipe( pipe_slow ); 20700 %} 20701 20702 instruct extract1d(regD dst, vecD src, immI idx) %{ 20703 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 1); 20704 match(Set dst (ExtractD src idx)); 20705 ins_encode %{ 20706 int vector_len = 0; 20707 int midx = 0x1 & $idx$$constant; 20708 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20709 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20710 } 20711 %} 20712 ins_pipe( pipe_slow ); 20713 %} 20714 20715 instruct extract16f(regF dst, vecZ src, vecZ tmp, immI idx) %{ 20716 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 16); 20717 match(Set dst (ExtractF src idx)); 20718 effect(TEMP tmp); 20719 ins_encode %{ 20720 int vector_len=2; 20721 int midx = 0xF & $idx$$constant; 20722 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20723 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20724 } else if (midx >= 1 && midx <= 3) { 20725 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20726 } else { 20727 int extr_idx1 = midx / 4; 20728 int extr_idx2 = midx % 4; 20729 __ vextractf32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20730 __ vpshufps($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, extr_idx2, vector_len); 20731 } 20732 %} 20733 ins_pipe( pipe_slow ); 20734 %} 20735 20736 instruct extract8f(regF dst, vecY src, vecY tmp, immI idx) %{ 20737 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20738 match(Set dst (ExtractF src idx)); 20739 effect(TEMP tmp); 20740 ins_encode %{ 20741 int vector_len=1; 20742 int midx = 0x7 & $idx$$constant; 20743 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20744 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20745 } else if (midx >= 1 && midx <= 3) { 20746 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20747 } else if (midx >= 4) { 20748 __ vextractf128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20749 __ vpshufps($dst$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, midx - 4, vector_len); 20750 } 20751 %} 20752 ins_pipe( pipe_slow ); 20753 %} 20754 20755 instruct extract4f(regF dst, vecX src, immI idx) %{ 20756 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20757 match(Set dst (ExtractF src idx)); 20758 ins_encode %{ 20759 int vector_len=0; 20760 int midx = 0x3 & $idx$$constant; 20761 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20762 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20763 } else if (midx >= 1 && midx <= 3) { 20764 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20765 } 20766 %} 20767 ins_pipe( pipe_slow ); 20768 %} 20769 20770 instruct extract2f(regF dst, vecD src, immI idx) %{ 20771 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20772 match(Set dst (ExtractF src idx)); 20773 ins_encode %{ 20774 int vector_len=0; 20775 int midx = 0x1 & $idx$$constant; 20776 if (midx == 0 && $dst$$XMMRegister != $src$$XMMRegister) { 20777 __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); 20778 } else { 20779 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, midx, vector_len); 20780 } 20781 %} 20782 ins_pipe( pipe_slow ); 20783 %} 20784 20785 instruct extract8l(rRegL dst, vecZ src, vecZ tmp, immI idx) %{ 20786 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20787 match(Set dst (ExtractL src idx)); 20788 effect(TEMP tmp); 20789 ins_encode %{ 20790 int midx = 0x7 & $idx$$constant; 20791 if (midx == 0) { 20792 __ movdl($dst$$Register, $src$$XMMRegister); 20793 } else if (midx >= 1 && midx <= 3) { 20794 __ pextrq($dst$$Register, $src$$XMMRegister, midx); 20795 } 20796 else if (midx >= 4 && midx <= 7) { 20797 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20798 __ pextrq($dst$$Register, $tmp$$XMMRegister, midx-4); 20799 } 20800 %} 20801 ins_pipe( pipe_slow ); 20802 %} 20803 20804 instruct extract4l(rRegL dst, vecY src, immI idx) %{ 20805 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20806 match(Set dst (ExtractL src idx)); 20807 ins_encode %{ 20808 int midx = 0x3 & $idx$$constant; 20809 if (midx == 0) { 20810 __ movdl($dst$$Register, $src$$XMMRegister); 20811 } else if (midx >= 1 && midx <= 3) { 20812 __ pextrq($dst$$Register, $src$$XMMRegister, midx); 20813 } 20814 %} 20815 ins_pipe( pipe_slow ); 20816 %} 20817 20818 instruct extract2l(rRegL dst, vecX src, immI idx) %{ 20819 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20820 match(Set dst (ExtractL src idx)); 20821 ins_encode %{ 20822 int midx = 0x1 & $idx$$constant; 20823 if (midx == 0) { 20824 __ movdl($dst$$Register, $src$$XMMRegister); 20825 } else if (midx >= 1) { 20826 __ pextrq($dst$$Register, $src$$XMMRegister, midx); 20827 } 20828 %} 20829 ins_pipe( pipe_slow ); 20830 %} 20831 20832 instruct extract1l(rRegL dst, vecD src, immI idx) %{ 20833 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 1); 20834 match(Set dst (ExtractL src idx)); 20835 ins_encode %{ 20836 int midx = 0x1 & $idx$$constant; 20837 if (midx == 0) { 20838 __ movdl($dst$$Register, $src$$XMMRegister); 20839 } 20840 %} 20841 ins_pipe( pipe_slow ); 20842 %} 20843 20844 instruct extract16i(rRegI dst, vecZ src, vecZ tmp, immI idx) %{ 20845 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 16); 20846 match(Set dst (ExtractI src idx)); 20847 effect(TEMP tmp); 20848 ins_encode %{ 20849 int midx = 0xF & $idx$$constant; 20850 if (midx == 0) { 20851 __ movdl($dst$$Register, $src$$XMMRegister); 20852 } 20853 else if (midx >= 1 && midx <= 3) { 20854 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20855 } 20856 else { 20857 // Using 4 because there are 4 ints in 128-bit 20858 int extr_idx1 = midx / 4; 20859 int extr_idx2 = midx % 4; 20860 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20861 __ pextrd($dst$$Register, $tmp$$XMMRegister, extr_idx2); 20862 } 20863 %} 20864 ins_pipe( pipe_slow ); 20865 %} 20866 20867 instruct extract8i(rRegI dst, vecY src, vecY tmp, immI idx) %{ 20868 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20869 match(Set dst (ExtractI src idx)); 20870 effect(TEMP tmp); 20871 ins_encode %{ 20872 int midx = 0x7 & $idx$$constant; 20873 if (midx == 0) { 20874 __ movdl($dst$$Register, $src$$XMMRegister); 20875 } else if (midx >= 1 && midx <= 3) { 20876 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20877 } else if (midx >= 4) { 20878 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20879 __ pextrd($dst$$Register, $tmp$$XMMRegister, midx - 4); 20880 } 20881 %} 20882 ins_pipe( pipe_slow ); 20883 %} 20884 20885 instruct extract4i(rRegI dst, vecX src, immI idx) %{ 20886 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20887 match(Set dst (ExtractI src idx)); 20888 ins_encode %{ 20889 int midx = 0x3 & $idx$$constant; 20890 if (midx == 0) { 20891 __ movdl($dst$$Register, $src$$XMMRegister); 20892 } else if (midx >= 1 && midx <= 3) { 20893 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20894 } 20895 %} 20896 ins_pipe( pipe_slow ); 20897 %} 20898 20899 instruct extract2i(rRegI dst, vecD src, immI idx) %{ 20900 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 2); 20901 match(Set dst (ExtractI src idx)); 20902 ins_encode %{ 20903 int midx = 0x1 & $idx$$constant; 20904 if (midx == 0) { 20905 __ movdl($dst$$Register, $src$$XMMRegister); 20906 } else if (midx >= 1) { 20907 __ pextrd($dst$$Register, $src$$XMMRegister, midx); 20908 } 20909 %} 20910 ins_pipe( pipe_slow ); 20911 %} 20912 20913 instruct extract32s(rRegI dst, vecZ src, vecZ tmp, immI idx) %{ 20914 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 32); 20915 match(Set dst (ExtractS src idx)); 20916 effect(TEMP tmp); 20917 ins_encode %{ 20918 int midx = 0x1F & $idx$$constant; 20919 if (midx == 0) { 20920 __ movdl($dst$$Register, $src$$XMMRegister); 20921 __ movswl($dst$$Register, $dst$$Register); 20922 } 20923 else if (midx >= 1 && midx <= 7) { 20924 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20925 __ movswl($dst$$Register, $dst$$Register); 20926 } 20927 else { 20928 int extr_idx1 = midx / 8; 20929 int extr_idx2 = midx % 8; 20930 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 20931 __ pextrw($dst$$Register, $tmp$$XMMRegister, extr_idx2); 20932 __ movswl($dst$$Register, $dst$$Register); 20933 } 20934 %} 20935 ins_pipe( pipe_slow ); 20936 %} 20937 20938 instruct extract16s(rRegI dst, vecY src, vecY tmp, immI idx) %{ 20939 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 16); 20940 match(Set dst (ExtractS src idx)); 20941 effect(TEMP tmp); 20942 ins_encode %{ 20943 int midx = 0xF & $idx$$constant; 20944 if (midx == 0) { 20945 __ movdl($dst$$Register, $src$$XMMRegister); 20946 __ movswl($dst$$Register, $dst$$Register); 20947 } else if (midx >= 1 && midx <= 7) { 20948 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20949 __ movswl($dst$$Register, $dst$$Register); 20950 } 20951 else if (midx >= 8 && midx <= 15) { 20952 __ vextracti128($tmp$$XMMRegister, $src$$XMMRegister, 0x1); 20953 __ pextrw($dst$$Register, $tmp$$XMMRegister, midx-8); 20954 __ movswl($dst$$Register, $dst$$Register); 20955 } 20956 %} 20957 ins_pipe( pipe_slow ); 20958 %} 20959 20960 instruct extract8s(rRegI dst, vecX src, immI idx) %{ 20961 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 20962 match(Set dst (ExtractS src idx)); 20963 ins_encode %{ 20964 int midx = 0x7 & $idx$$constant; 20965 if (midx == 0) { 20966 __ movdl($dst$$Register, $src$$XMMRegister); 20967 __ movswl($dst$$Register, $dst$$Register); 20968 } else if (midx >= 1) { 20969 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20970 __ movswl($dst$$Register, $dst$$Register); 20971 } 20972 %} 20973 ins_pipe( pipe_slow ); 20974 %} 20975 20976 instruct extract4s(rRegI dst, vecD src, immI idx) %{ 20977 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 4); 20978 match(Set dst (ExtractS src idx)); 20979 ins_encode %{ 20980 int midx = 0x3 & $idx$$constant; 20981 if (midx == 0) { 20982 __ movdl($dst$$Register, $src$$XMMRegister); 20983 __ movswl($dst$$Register, $dst$$Register); 20984 } else if (midx >= 1) { 20985 __ pextrw($dst$$Register, $src$$XMMRegister, midx); 20986 __ movswl($dst$$Register, $dst$$Register); 20987 } 20988 %} 20989 ins_pipe( pipe_slow ); 20990 %} 20991 20992 instruct extract64b(rRegI dst, vecZ src, vecZ tmp, immI idx) %{ 20993 predicate(UseAVX > 2 && n->in(1)->bottom_type()->is_vect()->length() == 64); 20994 match(Set dst (ExtractB src idx)); 20995 effect(TEMP tmp); 20996 ins_encode %{ 20997 int midx = 0x3F & $idx$$constant; 20998 if (midx == 0) { 20999 __ movdl($dst$$Register, $src$$XMMRegister); 21000 __ movsbl($dst$$Register, $dst$$Register); 21001 } 21002 else if (midx >= 1 && midx <= 15) { 21003 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21004 __ movsbl($dst$$Register, $dst$$Register); 21005 } 21006 else { 21007 int extr_idx1 = midx / 16; 21008 int extr_idx2 = midx % 16; 21009 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 21010 __ pextrb($dst$$Register, $tmp$$XMMRegister, extr_idx2); 21011 __ movsbl($dst$$Register, $dst$$Register); 21012 } 21013 %} 21014 ins_pipe( pipe_slow ); 21015 %} 21016 21017 instruct extract32b(rRegI dst, vecY src, vecY tmp, immI idx) %{ 21018 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 32); 21019 match(Set dst (ExtractB src idx)); 21020 effect(TEMP tmp); 21021 ins_encode %{ 21022 int midx = 0x1F & $idx$$constant; 21023 if (midx == 0) { 21024 __ movdl($dst$$Register, $src$$XMMRegister); 21025 __ movsbl($dst$$Register, $dst$$Register); 21026 } 21027 else if (midx >= 1 && midx <= 15) { 21028 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21029 __ movsbl($dst$$Register, $dst$$Register); 21030 } 21031 else { 21032 int extr_idx1 = midx / 16; 21033 int extr_idx2 = midx % 16; 21034 __ vextracti32x4($tmp$$XMMRegister, $src$$XMMRegister, extr_idx1); 21035 __ pextrb($dst$$Register, $tmp$$XMMRegister, extr_idx2); 21036 __ movsbl($dst$$Register, $dst$$Register); 21037 } 21038 %} 21039 ins_pipe( pipe_slow ); 21040 %} 21041 21042 instruct extract16b(rRegI dst, vecX src, immI idx) %{ 21043 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 16); 21044 match(Set dst (ExtractB src idx)); 21045 ins_encode %{ 21046 int midx = 0xF & $idx$$constant; 21047 if (midx == 0) { 21048 __ movdl($dst$$Register, $src$$XMMRegister); 21049 __ movsbl($dst$$Register, $dst$$Register); 21050 } else if (midx >= 1) { 21051 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21052 __ movsbl($dst$$Register, $dst$$Register); 21053 } 21054 %} 21055 ins_pipe( pipe_slow ); 21056 %} 21057 21058 instruct extract8b(rRegI dst, vecD src, immI idx) %{ 21059 predicate(UseAVX > 0 && n->in(1)->bottom_type()->is_vect()->length() == 8); 21060 match(Set dst (ExtractB src idx)); 21061 ins_encode %{ 21062 int midx = 0x7 & $idx$$constant; 21063 if (midx == 0) { 21064 __ movdl($dst$$Register, $src$$XMMRegister); 21065 __ movsbl($dst$$Register, $dst$$Register); 21066 } else if (midx >= 1) { 21067 __ pextrb($dst$$Register, $src$$XMMRegister, midx); 21068 __ movsbl($dst$$Register, $dst$$Register); 21069 } 21070 %} 21071 ins_pipe( pipe_slow ); 21072 %} 21073 21074 instruct vcmpge32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21075 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 21076 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21077 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21078 match(Set dst (VectorMaskCmp src1 src2)); 21079 effect(TEMP scratch); 21080 format %{ "vpcmpgtb $dst,$src2,$src1\n " 21081 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed32B" %} 21082 ins_encode %{ 21083 int vector_len = 1; 21084 __ vpcmpgtb($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21085 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21086 %} 21087 ins_pipe( pipe_slow ); 21088 %} 21089 21090 instruct vcmpge64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21091 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 21092 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21093 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21094 match(Set dst (VectorMaskCmp src1 src2)); 21095 effect(TEMP dst, TEMP scratch); 21096 format %{ "vpcmpnltb k2,$src1,$src2\n\t" 21097 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed64B" %} 21098 ins_encode %{ 21099 int vector_len = 2; 21100 Assembler::ComparisonPredicate cmp = Assembler::nlt; 21101 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21102 KRegister mask = k0; // The comparison itself is not being masked. 21103 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21104 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21105 %} 21106 ins_pipe( pipe_slow ); 21107 %} 21108 21109 instruct vcmple8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21110 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21111 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21112 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21113 match(Set dst (VectorMaskCmp src1 src2)); 21114 effect(TEMP scratch); 21115 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 21116 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8B" %} 21117 ins_encode %{ 21118 int vector_len = 0; 21119 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21120 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21121 %} 21122 ins_pipe( pipe_slow ); 21123 %} 21124 21125 instruct vcmple16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21126 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 21127 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21128 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21129 match(Set dst (VectorMaskCmp src1 src2)); 21130 effect(TEMP scratch); 21131 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 21132 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16B" %} 21133 ins_encode %{ 21134 int vector_len = 0; 21135 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21136 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21137 %} 21138 ins_pipe( pipe_slow ); 21139 %} 21140 21141 instruct vcmple32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21142 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 21143 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21144 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21145 match(Set dst (VectorMaskCmp src1 src2)); 21146 effect(TEMP scratch); 21147 format %{ "vpcmpgtb $dst,$src1,$src2\n\t" 21148 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed32B" %} 21149 ins_encode %{ 21150 int vector_len = 1; 21151 __ vpcmpgtb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21152 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21153 %} 21154 ins_pipe( pipe_slow ); 21155 %} 21156 21157 instruct vcmple64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21158 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 21159 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21160 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21161 match(Set dst (VectorMaskCmp src1 src2)); 21162 effect(TEMP dst, TEMP scratch); 21163 format %{ "vpcmpleb k2,$src1,$src2\n\t" 21164 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed64B" %} 21165 ins_encode %{ 21166 int vector_len = 2; 21167 Assembler::ComparisonPredicate cmp = Assembler::le; 21168 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21169 KRegister mask = k0; // The comparison itself is not being masked. 21170 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21171 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21172 %} 21173 ins_pipe( pipe_slow ); 21174 %} 21175 21176 instruct vcmpne8B(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21177 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21178 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21179 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21180 match(Set dst (VectorMaskCmp src1 src2)); 21181 effect(TEMP scratch); 21182 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 21183 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8B" %} 21184 ins_encode %{ 21185 int vector_len = 0; 21186 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21187 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21188 %} 21189 ins_pipe( pipe_slow ); 21190 %} 21191 21192 instruct vcmpne16B(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21193 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && 21194 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21195 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21196 match(Set dst (VectorMaskCmp src1 src2)); 21197 effect(TEMP scratch); 21198 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 21199 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16B" %} 21200 ins_encode %{ 21201 int vector_len = 0; 21202 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21203 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21204 %} 21205 ins_pipe( pipe_slow ); 21206 %} 21207 21208 instruct vcmpne32B(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21209 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && 21210 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21211 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21212 match(Set dst (VectorMaskCmp src1 src2)); 21213 effect(TEMP scratch); 21214 format %{ "vpcmpeqb $dst,$src1,$src2\n\t" 21215 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed32B" %} 21216 ins_encode %{ 21217 int vector_len = 1; 21218 __ vpcmpeqb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21219 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21220 %} 21221 ins_pipe( pipe_slow ); 21222 %} 21223 21224 instruct vcmpne64B(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21225 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && 21226 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21227 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 21228 match(Set dst (VectorMaskCmp src1 src2)); 21229 effect(TEMP dst, TEMP scratch); 21230 format %{ "vpcmpneqb k2,$src1,$src2\n\t" 21231 "vmovdqu8 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed64B" %} 21232 ins_encode %{ 21233 int vector_len = 2; 21234 Assembler::ComparisonPredicate cmp = Assembler::neq; 21235 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21236 KRegister mask = k0; // The comparison itself is not being masked. 21237 __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21238 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21239 %} 21240 ins_pipe( pipe_slow ); 21241 %} 21242 21243 instruct vcmpeq4S(vecD dst, vecD src1, vecD src2) %{ 21244 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21245 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21246 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21247 match(Set dst (VectorMaskCmp src1 src2)); 21248 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed4S" %} 21249 ins_encode %{ 21250 int vector_len = 0; 21251 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21252 %} 21253 ins_pipe( pipe_slow ); 21254 %} 21255 21256 instruct vcmpeq8S(vecX dst, vecX src1, vecX src2) %{ 21257 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21258 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21259 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21260 match(Set dst (VectorMaskCmp src1 src2)); 21261 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed8S" %} 21262 ins_encode %{ 21263 int vector_len = 0; 21264 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21265 %} 21266 ins_pipe( pipe_slow ); 21267 %} 21268 21269 instruct vcmpeq16S(vecY dst, vecY src1, vecY src2) %{ 21270 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21271 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21272 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21273 match(Set dst (VectorMaskCmp src1 src2)); 21274 format %{ "vpcmpeqw $dst,$src1,$src2\n\t! cmpeq packed16S" %} 21275 ins_encode %{ 21276 int vector_len = 1; 21277 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21278 %} 21279 ins_pipe( pipe_slow ); 21280 %} 21281 21282 instruct vcmpeq32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21283 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21284 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21285 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21286 match(Set dst (VectorMaskCmp src1 src2)); 21287 effect(TEMP dst, TEMP scratch); 21288 format %{ "vpcmpeqw k2,$src1,$src2\n\t" 21289 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed32S" %} 21290 ins_encode %{ 21291 int vector_len = 2; 21292 Assembler::ComparisonPredicate cmp = Assembler::eq; 21293 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21294 KRegister mask = k0; // The comparison itself is not being masked. 21295 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21296 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21297 %} 21298 ins_pipe( pipe_slow ); 21299 %} 21300 21301 instruct vcmplt4S(vecD dst, vecD src1, vecD src2) %{ 21302 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21303 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21304 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21305 match(Set dst (VectorMaskCmp src1 src2)); 21306 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed4S" %} 21307 ins_encode %{ 21308 int vector_len = 0; 21309 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21310 %} 21311 ins_pipe( pipe_slow ); 21312 %} 21313 21314 instruct vcmplt8S(vecX dst, vecX src1, vecX src2) %{ 21315 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21316 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21317 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21318 match(Set dst (VectorMaskCmp src1 src2)); 21319 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed8S" %} 21320 ins_encode %{ 21321 int vector_len = 0; 21322 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21323 %} 21324 ins_pipe( pipe_slow ); 21325 %} 21326 21327 instruct vcmplt16S(vecY dst, vecY src1, vecY src2) %{ 21328 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21329 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21330 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21331 match(Set dst (VectorMaskCmp src1 src2)); 21332 format %{ "vpcmpgtw $dst,$src2,$src1\t! cmplt packed16S" %} 21333 ins_encode %{ 21334 int vector_len = 1; 21335 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21336 %} 21337 ins_pipe( pipe_slow ); 21338 %} 21339 21340 instruct vcmplt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21341 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21342 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21343 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21344 match(Set dst (VectorMaskCmp src1 src2)); 21345 effect(TEMP dst, TEMP scratch); 21346 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 21347 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 21348 ins_encode %{ 21349 int vector_len = 2; 21350 Assembler::ComparisonPredicate cmp = Assembler::lt; 21351 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21352 KRegister mask = k0; // The comparison itself is not being masked. 21353 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21354 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21355 %} 21356 ins_pipe( pipe_slow ); 21357 %} 21358 21359 instruct vcmpgt4S(vecD dst, vecD src1, vecD src2) %{ 21360 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21361 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21362 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21363 match(Set dst (VectorMaskCmp src1 src2)); 21364 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed4S" %} 21365 ins_encode %{ 21366 int vector_len = 0; 21367 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21368 %} 21369 ins_pipe( pipe_slow ); 21370 %} 21371 21372 instruct vcmpgt8S(vecX dst, vecX src1, vecX src2) %{ 21373 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21374 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21375 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21376 match(Set dst (VectorMaskCmp src1 src2)); 21377 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed8S" %} 21378 ins_encode %{ 21379 int vector_len = 0; 21380 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21381 %} 21382 ins_pipe( pipe_slow ); 21383 %} 21384 21385 instruct vcmpgt16S(vecY dst, vecY src1, vecY src2) %{ 21386 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21387 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21388 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21389 match(Set dst (VectorMaskCmp src1 src2)); 21390 format %{ "vpcmpgtw $dst,$src1,$src2\t! cmpgt packed16S" %} 21391 ins_encode %{ 21392 int vector_len = 1; 21393 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21394 %} 21395 ins_pipe( pipe_slow ); 21396 %} 21397 21398 instruct vcmpgt32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21399 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21400 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21401 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21402 match(Set dst (VectorMaskCmp src1 src2)); 21403 effect(TEMP dst, TEMP scratch); 21404 format %{ "vpcmpnlew k2,$src1,$src2\n\t" 21405 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed32S" %} 21406 ins_encode %{ 21407 int vector_len = 2; 21408 Assembler::ComparisonPredicate cmp = Assembler::nle; 21409 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21410 KRegister mask = k0; // The comparison itself is not being masked. 21411 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21412 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21413 %} 21414 ins_pipe( pipe_slow ); 21415 %} 21416 21417 instruct vcmpge4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21418 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21419 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21420 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21421 match(Set dst (VectorMaskCmp src1 src2)); 21422 effect(TEMP scratch); 21423 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 21424 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4S" %} 21425 ins_encode %{ 21426 int vector_len = 0; 21427 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21428 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21429 %} 21430 ins_pipe( pipe_slow ); 21431 %} 21432 21433 instruct vcmpge8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21434 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21435 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21436 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21437 match(Set dst (VectorMaskCmp src1 src2)); 21438 effect(TEMP scratch); 21439 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 21440 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed8S" %} 21441 ins_encode %{ 21442 int vector_len = 0; 21443 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21444 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21445 %} 21446 ins_pipe( pipe_slow ); 21447 %} 21448 21449 instruct vcmpge16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21450 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21451 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21452 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21453 match(Set dst (VectorMaskCmp src1 src2)); 21454 effect(TEMP scratch); 21455 format %{ "vpcmpgtw $dst,$src2,$src1\n\t" 21456 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed16S" %} 21457 ins_encode %{ 21458 int vector_len = 1; 21459 __ vpcmpgtw($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21460 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21461 %} 21462 ins_pipe( pipe_slow ); 21463 %} 21464 21465 instruct vcmpge32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21466 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21467 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21468 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21469 match(Set dst (VectorMaskCmp src1 src2)); 21470 effect(TEMP dst, TEMP scratch); 21471 format %{ "vpcmpnltw k2,$src1,$src2\n\t" 21472 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed32S" %} 21473 ins_encode %{ 21474 int vector_len = 2; 21475 Assembler::ComparisonPredicate cmp = Assembler::nlt; 21476 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21477 KRegister mask = k0; // The comparison itself is not being masked. 21478 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21479 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21480 %} 21481 ins_pipe( pipe_slow ); 21482 %} 21483 21484 instruct vcmple4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21485 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21486 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21487 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21488 match(Set dst (VectorMaskCmp src1 src2)); 21489 effect(TEMP scratch); 21490 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 21491 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4S" %} 21492 ins_encode %{ 21493 int vector_len = 0; 21494 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21495 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21496 %} 21497 ins_pipe( pipe_slow ); 21498 %} 21499 21500 instruct vcmple8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21501 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21502 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21503 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21504 match(Set dst (VectorMaskCmp src1 src2)); 21505 effect(TEMP scratch); 21506 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 21507 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed8S" %} 21508 ins_encode %{ 21509 int vector_len = 0; 21510 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21511 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21512 %} 21513 ins_pipe( pipe_slow ); 21514 %} 21515 21516 instruct vcmple16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21517 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21518 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21519 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21520 match(Set dst (VectorMaskCmp src1 src2)); 21521 effect(TEMP scratch); 21522 format %{ "vpcmpgtw $dst,$src1,$src2\n\t" 21523 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed16S" %} 21524 ins_encode %{ 21525 int vector_len = 1; 21526 __ vpcmpgtw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21527 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21528 %} 21529 ins_pipe( pipe_slow ); 21530 %} 21531 21532 instruct vcmple32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21533 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21534 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21535 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21536 match(Set dst (VectorMaskCmp src1 src2)); 21537 effect(TEMP dst, TEMP scratch); 21538 format %{ "vpcmplew k2,$src1,$src2\n\t" 21539 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed32S" %} 21540 ins_encode %{ 21541 int vector_len = 2; 21542 Assembler::ComparisonPredicate cmp = Assembler::le; 21543 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21544 KRegister mask = k0; // The comparison itself is not being masked. 21545 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21546 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21547 %} 21548 ins_pipe( pipe_slow ); 21549 %} 21550 21551 instruct vcmpne4S(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21552 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && 21553 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21554 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21555 match(Set dst (VectorMaskCmp src1 src2)); 21556 effect(TEMP scratch); 21557 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 21558 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4S" %} 21559 ins_encode %{ 21560 int vector_len = 0; 21561 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21562 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21563 %} 21564 ins_pipe( pipe_slow ); 21565 %} 21566 21567 instruct vcmpne8S(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21568 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && 21569 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21570 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21571 match(Set dst (VectorMaskCmp src1 src2)); 21572 effect(TEMP scratch); 21573 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 21574 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed8S" %} 21575 ins_encode %{ 21576 int vector_len = 0; 21577 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21578 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21579 %} 21580 ins_pipe( pipe_slow ); 21581 %} 21582 21583 instruct vcmpne16S(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21584 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && 21585 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21586 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21587 match(Set dst (VectorMaskCmp src1 src2)); 21588 effect(TEMP scratch); 21589 format %{ "vpcmpeqw $dst,$src1,$src2\n\t" 21590 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed16S" %} 21591 ins_encode %{ 21592 int vector_len = 1; 21593 __ vpcmpeqw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21594 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21595 %} 21596 ins_pipe( pipe_slow ); 21597 %} 21598 21599 instruct vcmpne32S(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21600 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && 21601 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21602 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 21603 match(Set dst (VectorMaskCmp src1 src2)); 21604 effect(TEMP dst, TEMP scratch); 21605 format %{ "vpcmpneqw k2,$src1,$src2\n\t" 21606 "vmovdqu16 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed32S" %} 21607 ins_encode %{ 21608 int vector_len = 2; 21609 Assembler::ComparisonPredicate cmp = Assembler::neq; 21610 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21611 KRegister mask = k0; // The comparison itself is not being masked. 21612 __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21613 __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21614 %} 21615 ins_pipe( pipe_slow ); 21616 %} 21617 21618 instruct vcmpeq1L(vecD dst, vecD src1, vecD src2) %{ 21619 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21620 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21621 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21622 match(Set dst (VectorMaskCmp src1 src2)); 21623 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed1L" %} 21624 ins_encode %{ 21625 int vector_len = 0; 21626 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21627 %} 21628 ins_pipe( pipe_slow ); 21629 %} 21630 21631 instruct vcmpeq2L(vecX dst, vecX src1, vecX src2) %{ 21632 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21633 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21634 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21635 match(Set dst (VectorMaskCmp src1 src2)); 21636 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed2L" %} 21637 ins_encode %{ 21638 int vector_len = 0; 21639 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21640 %} 21641 ins_pipe( pipe_slow ); 21642 %} 21643 21644 instruct vcmpeq4L(vecY dst, vecY src1, vecY src2) %{ 21645 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21646 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21647 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21648 match(Set dst (VectorMaskCmp src1 src2)); 21649 format %{ "vpcmpeqq $dst,$src1,$src2\n\t! cmpeq packed4L" %} 21650 ins_encode %{ 21651 int vector_len = 1; 21652 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21653 %} 21654 ins_pipe( pipe_slow ); 21655 %} 21656 21657 instruct vcmpeq8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21658 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21659 n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && 21660 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21661 match(Set dst (VectorMaskCmp src1 src2)); 21662 effect(TEMP dst, TEMP scratch); 21663 format %{ "vpcmpeqq k2,$src1,$src2\n\t" 21664 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpeq packed8L" %} 21665 ins_encode %{ 21666 int vector_len = 2; 21667 Assembler::ComparisonPredicate cmp = Assembler::eq; 21668 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21669 KRegister mask = k0; // The comparison itself is not being masked. 21670 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21671 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21672 %} 21673 ins_pipe( pipe_slow ); 21674 %} 21675 21676 instruct vcmplt1L(vecD dst, vecD src1, vecD src2) %{ 21677 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21678 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21679 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21680 match(Set dst (VectorMaskCmp src1 src2)); 21681 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed1L" %} 21682 ins_encode %{ 21683 int vector_len = 0; 21684 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21685 %} 21686 ins_pipe( pipe_slow ); 21687 %} 21688 21689 instruct vcmplt2L(vecX dst, vecX src1, vecX src2) %{ 21690 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21691 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21692 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21693 match(Set dst (VectorMaskCmp src1 src2)); 21694 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed2L" %} 21695 ins_encode %{ 21696 int vector_len = 0; 21697 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21698 %} 21699 ins_pipe( pipe_slow ); 21700 %} 21701 21702 instruct vcmplt4L(vecY dst, vecY src1, vecY src2) %{ 21703 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21704 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21705 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21706 match(Set dst (VectorMaskCmp src1 src2)); 21707 format %{ "vpcmpgtq $dst,$src2,$src1\t! cmplt packed4L" %} 21708 ins_encode %{ 21709 int vector_len = 1; 21710 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21711 %} 21712 ins_pipe( pipe_slow ); 21713 %} 21714 21715 instruct vcmplt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21716 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21717 n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && 21718 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21719 match(Set dst (VectorMaskCmp src1 src2)); 21720 effect(TEMP dst, TEMP scratch); 21721 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 21722 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 21723 ins_encode %{ 21724 int vector_len = 2; 21725 Assembler::ComparisonPredicate cmp = Assembler::lt; 21726 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21727 KRegister mask = k0; // The comparison itself is not being masked. 21728 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21729 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21730 %} 21731 ins_pipe( pipe_slow ); 21732 %} 21733 21734 instruct vcmpgt1L(vecD dst, vecD src1, vecD src2) %{ 21735 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21736 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21737 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21738 match(Set dst (VectorMaskCmp src1 src2)); 21739 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed1L" %} 21740 ins_encode %{ 21741 int vector_len = 0; 21742 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21743 %} 21744 ins_pipe( pipe_slow ); 21745 %} 21746 21747 instruct vcmpgt2L(vecX dst, vecX src1, vecX src2) %{ 21748 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21749 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21750 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21751 match(Set dst (VectorMaskCmp src1 src2)); 21752 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed2L" %} 21753 ins_encode %{ 21754 int vector_len = 0; 21755 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21756 %} 21757 ins_pipe( pipe_slow ); 21758 %} 21759 21760 instruct vcmpgt4L(vecY dst, vecY src1, vecY src2) %{ 21761 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21762 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21763 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21764 match(Set dst (VectorMaskCmp src1 src2)); 21765 format %{ "vpcmpgtq $dst,$src1,$src2\t! cmpgt packed4L" %} 21766 ins_encode %{ 21767 int vector_len = 1; 21768 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21769 %} 21770 ins_pipe( pipe_slow ); 21771 %} 21772 21773 instruct vcmpgt8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21774 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21775 n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && 21776 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21777 match(Set dst (VectorMaskCmp src1 src2)); 21778 effect(TEMP dst, TEMP scratch); 21779 format %{ "vpcmpnleq k2,$src1,$src2\n\t" 21780 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpgt packed8L" %} 21781 ins_encode %{ 21782 int vector_len = 2; 21783 Assembler::ComparisonPredicate cmp = Assembler::nle; 21784 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21785 KRegister mask = k0; // The comparison itself is not being masked. 21786 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21787 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21788 %} 21789 ins_pipe( pipe_slow ); 21790 %} 21791 21792 instruct vcmpge1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21793 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21794 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21795 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21796 match(Set dst (VectorMaskCmp src1 src2)); 21797 effect(TEMP scratch); 21798 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 21799 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed1L" %} 21800 ins_encode %{ 21801 int vector_len = 0; 21802 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21803 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21804 %} 21805 ins_pipe( pipe_slow ); 21806 %} 21807 21808 instruct vcmpge2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21809 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21810 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21811 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21812 match(Set dst (VectorMaskCmp src1 src2)); 21813 effect(TEMP scratch); 21814 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 21815 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed2L" %} 21816 ins_encode %{ 21817 int vector_len = 0; 21818 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21819 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21820 %} 21821 ins_pipe( pipe_slow ); 21822 %} 21823 21824 instruct vcmpge4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21825 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21826 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21827 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21828 match(Set dst (VectorMaskCmp src1 src2)); 21829 effect(TEMP scratch); 21830 format %{ "vpcmpgtq $dst,$src2,$src1\n\t" 21831 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpge packed4L" %} 21832 ins_encode %{ 21833 int vector_len = 1; 21834 __ vpcmpgtq($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vector_len); 21835 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21836 %} 21837 ins_pipe( pipe_slow ); 21838 %} 21839 21840 instruct vcmpge8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21841 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21842 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && 21843 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21844 match(Set dst (VectorMaskCmp src1 src2)); 21845 effect(TEMP dst, TEMP scratch); 21846 format %{ "vpcmpnltq k2,$src1,$src2\n\t" 21847 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpge packed8L" %} 21848 ins_encode %{ 21849 int vector_len = 2; 21850 Assembler::ComparisonPredicate cmp = Assembler::nlt; 21851 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21852 KRegister mask = k0; // The comparison itself is not being masked. 21853 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21854 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21855 %} 21856 ins_pipe( pipe_slow ); 21857 %} 21858 21859 instruct vcmple1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21860 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21861 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21862 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21863 match(Set dst (VectorMaskCmp src1 src2)); 21864 effect(TEMP scratch); 21865 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 21866 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed1L" %} 21867 ins_encode %{ 21868 int vector_len = 0; 21869 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21870 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21871 %} 21872 ins_pipe( pipe_slow ); 21873 %} 21874 21875 instruct vcmple2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21876 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21877 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21878 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21879 match(Set dst (VectorMaskCmp src1 src2)); 21880 effect(TEMP scratch); 21881 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 21882 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed2L" %} 21883 ins_encode %{ 21884 int vector_len = 0; 21885 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21886 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21887 %} 21888 ins_pipe( pipe_slow ); 21889 %} 21890 21891 instruct vcmple4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21892 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21893 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21894 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21895 match(Set dst (VectorMaskCmp src1 src2)); 21896 effect(TEMP scratch); 21897 format %{ "vpcmpgtq $dst,$src1,$src2\n\t" 21898 "vpxor $dst,$dst,0xFFFFFFFF\t! cmple packed4L" %} 21899 ins_encode %{ 21900 int vector_len = 1; 21901 __ vpcmpgtq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21902 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21903 %} 21904 ins_pipe( pipe_slow ); 21905 %} 21906 21907 instruct vcmple8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21908 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21909 n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && 21910 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21911 match(Set dst (VectorMaskCmp src1 src2)); 21912 effect(TEMP dst, TEMP scratch); 21913 format %{ "vpcmpleq k2,$src1,$src2\n\t" 21914 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmple packed8L" %} 21915 ins_encode %{ 21916 int vector_len = 2; 21917 Assembler::ComparisonPredicate cmp = Assembler::le; 21918 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21919 KRegister mask = k0; // The comparison itself is not being masked. 21920 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21921 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21922 %} 21923 ins_pipe( pipe_slow ); 21924 %} 21925 21926 instruct vcmpne1L(vecD dst, vecD src1, vecD src2, rRegL scratch) %{ 21927 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && 21928 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21929 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21930 match(Set dst (VectorMaskCmp src1 src2)); 21931 effect(TEMP scratch); 21932 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 21933 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed1L" %} 21934 ins_encode %{ 21935 int vector_len = 0; 21936 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21937 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21938 %} 21939 ins_pipe( pipe_slow ); 21940 %} 21941 21942 instruct vcmpne2L(vecX dst, vecX src1, vecX src2, rRegL scratch) %{ 21943 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && 21944 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21945 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21946 match(Set dst (VectorMaskCmp src1 src2)); 21947 effect(TEMP scratch); 21948 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 21949 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed2L" %} 21950 ins_encode %{ 21951 int vector_len = 0; 21952 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21953 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21954 %} 21955 ins_pipe( pipe_slow ); 21956 %} 21957 21958 instruct vcmpne4L(vecY dst, vecY src1, vecY src2, rRegL scratch) %{ 21959 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && 21960 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21961 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21962 match(Set dst (VectorMaskCmp src1 src2)); 21963 effect(TEMP scratch); 21964 format %{ "vpcmpeqq $dst,$src1,$src2\n\t" 21965 "vpxor $dst,$dst,0xFFFFFFFF\t! cmpneq packed4L" %} 21966 ins_encode %{ 21967 int vector_len = 1; 21968 __ vpcmpeqq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 21969 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 21970 %} 21971 ins_pipe( pipe_slow ); 21972 %} 21973 21974 instruct vcmpne8L(vecZ dst, vecZ src1, vecZ src2, rRegL scratch) %{ 21975 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 21976 n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && 21977 n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); 21978 match(Set dst (VectorMaskCmp src1 src2)); 21979 effect(TEMP dst, TEMP scratch); 21980 format %{ "vpcmpneqq k2,$src1,$src2\n\t" 21981 "vmovdqu64 $dst, k2{z}, 0xFFFFFFFFFF \t! cmpneq packed8L" %} 21982 ins_encode %{ 21983 int vector_len = 2; 21984 Assembler::ComparisonPredicate cmp = Assembler::neq; 21985 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 21986 KRegister mask = k0; // The comparison itself is not being masked. 21987 __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vector_len); 21988 __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vector_len, $scratch$$Register); 21989 %} 21990 ins_pipe( pipe_slow ); 21991 %} 21992 21993 instruct blendvps2F(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 21994 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 21995 match(Set dst (VectorBlend (Binary dst src) mask)); 21996 effect(TEMP xmm_0); 21997 format %{ "blendvps $dst,$src,$mask\t! packed2F" %} 21998 ins_encode %{ 21999 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22000 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22001 } 22002 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 22003 %} 22004 ins_pipe( pipe_slow ); 22005 %} 22006 22007 instruct vblendvps2F(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22008 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22009 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22010 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed2F" %} 22011 ins_encode %{ 22012 int vector_len = 0; 22013 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22014 %} 22015 ins_pipe( pipe_slow ); 22016 %} 22017 22018 instruct blendvps4F(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22019 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22020 match(Set dst (VectorBlend (Binary dst src) mask)); 22021 effect(TEMP xmm_0); 22022 format %{ "blendvps $dst,$src,$mask\t! packed4F" %} 22023 ins_encode %{ 22024 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22025 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22026 } 22027 __ blendvps($dst$$XMMRegister, $src$$XMMRegister); 22028 %} 22029 ins_pipe( pipe_slow ); 22030 %} 22031 22032 instruct vblendvps4F(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22033 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22034 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22035 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed4F" %} 22036 ins_encode %{ 22037 int vector_len = 0; 22038 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22039 %} 22040 ins_pipe( pipe_slow ); 22041 %} 22042 22043 instruct vblendvps8F(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22044 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22045 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22046 format %{ "vblendvps $dst,$src1,$src2,$mask\t! packed8F" %} 22047 ins_encode %{ 22048 int vector_len = 1; 22049 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22050 %} 22051 ins_pipe( pipe_slow ); 22052 %} 22053 22054 instruct vblendvps16F(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22055 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); 22056 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22057 effect(TEMP scratch); 22058 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 22059 "vblendmps $dst,k2,$src1,$src2\t! blend packed16F " %} 22060 ins_encode %{ 22061 int vector_len = 2; 22062 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22063 __ evpcmpeqd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22064 __ evblendmps($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22065 %} 22066 ins_pipe( pipe_slow ); 22067 %} 22068 22069 instruct vblendvpd8D(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22070 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22071 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22072 effect(TEMP scratch); 22073 format %{ "evpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 22074 "vblendmpd $dst,k2,$src1,$src2\t! blend packed16F " %} 22075 ins_encode %{ 22076 int vector_len = 2; 22077 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22078 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22079 __ evblendmpd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22080 %} 22081 ins_pipe( pipe_slow ); 22082 %} 22083 22084 instruct vpblendmb64B(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22085 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE && VM_Version::supports_avx512bw()); 22086 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22087 effect(TEMP scratch); 22088 format %{ "vpcmpeqb k2,$mask,0xFFFFFFFF\n\t" 22089 "vpblendmb $dst,k2,$src1,$src2\t! blend packed64B " %} 22090 ins_encode %{ 22091 int vector_len = 2; 22092 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22093 __ evpcmpb(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22094 __ evpblendmb($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22095 %} 22096 ins_pipe( pipe_slow ); 22097 %} 22098 22099 instruct vpblendmw32S(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22100 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT && VM_Version::supports_avx512bw()); 22101 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22102 effect(TEMP scratch); 22103 format %{ "vpcmpeqw k2,$mask,0xFFFFFFFF\n\t" 22104 "vpblendmw $dst,k2,$src1,$src2\t! blend packed32S " %} 22105 ins_encode %{ 22106 int vector_len = 2; 22107 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22108 __ evpcmpw(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22109 __ evpblendmw($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22110 %} 22111 ins_pipe( pipe_slow ); 22112 %} 22113 22114 instruct vpblendmd16I(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22115 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22116 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22117 effect(TEMP scratch); 22118 format %{ "vpcmpeqd k2,$mask,0xFFFFFFFF\n\t" 22119 "vpblendmd $dst,k2,$src1,$src2\t! blend packed16I " %} 22120 ins_encode %{ 22121 int vector_len = 2; 22122 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22123 __ evpcmpd(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22124 __ evpblendmd($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22125 %} 22126 ins_pipe( pipe_slow ); 22127 %} 22128 22129 instruct vpblendmq8L(vecZ dst, vecZ src1, vecZ src2, vecZ mask, rRegL scratch) %{ 22130 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22131 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22132 effect(TEMP scratch); 22133 format %{ "vpcmpeqq k2,$mask,0xFFFFFFFF\n\t" 22134 "vpblendmq $dst,k2,$src1,$src2\t! blend packed8L " %} 22135 ins_encode %{ 22136 int vector_len = 2; 22137 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 22138 __ evpcmpq(ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vector_len, $scratch$$Register); 22139 __ evpblendmq($dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vector_len); 22140 %} 22141 ins_pipe( pipe_slow ); 22142 %} 22143 22144 22145 instruct pblendvb2I(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22146 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22147 match(Set dst (VectorBlend (Binary dst src) mask)); 22148 effect(TEMP xmm_0); 22149 format %{ "vpblendvb $dst,$src,$mask\t! blend packed2I" %} 22150 ins_encode %{ 22151 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22152 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22153 } 22154 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22155 %} 22156 ins_pipe( pipe_slow ); 22157 %} 22158 22159 instruct vpblendvb2I(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22160 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22161 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22162 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2I" %} 22163 ins_encode %{ 22164 int vector_len = 0; 22165 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22166 %} 22167 ins_pipe( pipe_slow ); 22168 %} 22169 22170 instruct pblendvb4I(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22171 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22172 match(Set dst (VectorBlend (Binary dst src) mask)); 22173 effect(TEMP xmm_0); 22174 format %{ "vpblendvb $dst,$src,$mask\t! blend packed4I" %} 22175 ins_encode %{ 22176 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22177 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22178 } 22179 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22180 %} 22181 ins_pipe( pipe_slow ); 22182 %} 22183 22184 instruct vpblendvb4I(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22185 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22186 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22187 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4I" %} 22188 ins_encode %{ 22189 int vector_len = 0; 22190 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22191 %} 22192 ins_pipe( pipe_slow ); 22193 %} 22194 22195 instruct vpblendvb8I(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22196 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22197 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22198 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8I" %} 22199 ins_encode %{ 22200 int vector_len = 1; 22201 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22202 %} 22203 ins_pipe( pipe_slow ); 22204 %} 22205 22206 instruct pblendvb8B(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22207 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22208 match(Set dst (VectorBlend (Binary dst src) mask)); 22209 effect(TEMP xmm_0); 22210 format %{ "pblendvb $dst,$src,$mask\t! blend packed8B" %} 22211 ins_encode %{ 22212 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22213 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22214 } 22215 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22216 %} 22217 ins_pipe( pipe_slow ); 22218 %} 22219 22220 instruct vpblendvb8B(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22221 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22222 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22223 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8B" %} 22224 ins_encode %{ 22225 int vector_len = 0; 22226 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22227 %} 22228 ins_pipe( pipe_slow ); 22229 %} 22230 22231 instruct pblendvb16B(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22232 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22233 match(Set dst (VectorBlend (Binary dst src) mask)); 22234 effect(TEMP xmm_0); 22235 format %{ "pblendvb $dst,$src,$mask\t! blend packed16B" %} 22236 ins_encode %{ 22237 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22238 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22239 } 22240 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22241 %} 22242 ins_pipe( pipe_slow ); 22243 %} 22244 22245 instruct vpblendvb16B(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22246 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22247 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22248 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16B" %} 22249 ins_encode %{ 22250 int vector_len = 0; 22251 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22252 %} 22253 ins_pipe( pipe_slow ); 22254 %} 22255 22256 instruct vpblendvb32B(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22257 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22258 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22259 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed32B" %} 22260 ins_encode %{ 22261 int vector_len = 1; 22262 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22263 %} 22264 ins_pipe( pipe_slow ); 22265 %} 22266 22267 instruct pblendvb4S(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22268 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22269 match(Set dst (VectorBlend (Binary dst src) mask)); 22270 effect(TEMP xmm_0); 22271 format %{ "pblendvb $dst,$src,$mask\t! blend packed4S" %} 22272 ins_encode %{ 22273 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22274 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22275 } 22276 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22277 %} 22278 ins_pipe( pipe_slow ); 22279 %} 22280 22281 instruct vpblendvb4S(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22282 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22283 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22284 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4S" %} 22285 ins_encode %{ 22286 int vector_len = 0; 22287 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22288 %} 22289 ins_pipe( pipe_slow ); 22290 %} 22291 22292 instruct pblendvb8S(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22293 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22294 match(Set dst (VectorBlend (Binary dst src) mask)); 22295 effect(TEMP xmm_0); 22296 format %{ "pblendvb $dst,$src,$mask\t! blend packed8S" %} 22297 ins_encode %{ 22298 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22299 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22300 } 22301 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22302 %} 22303 ins_pipe( pipe_slow ); 22304 %} 22305 22306 instruct vpblendvb8S(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22307 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22308 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22309 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed8S" %} 22310 ins_encode %{ 22311 int vector_len = 0; 22312 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22313 %} 22314 ins_pipe( pipe_slow ); 22315 %} 22316 22317 instruct vpblendvb16S(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22318 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22319 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22320 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed16S" %} 22321 ins_encode %{ 22322 int vector_len = 1; 22323 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22324 %} 22325 ins_pipe( pipe_slow ); 22326 %} 22327 22328 instruct pblendvb1L(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22329 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22330 match(Set dst (VectorBlend (Binary dst src) mask)); 22331 effect(TEMP xmm_0); 22332 format %{ "pblendvb $dst,$src,$mask\t! blend packed1L" %} 22333 ins_encode %{ 22334 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22335 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22336 } 22337 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22338 %} 22339 ins_pipe( pipe_slow ); 22340 %} 22341 22342 instruct vpblendvb1L(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22343 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22344 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22345 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed1L" %} 22346 ins_encode %{ 22347 int vector_len = 0; 22348 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22349 %} 22350 ins_pipe( pipe_slow ); 22351 %} 22352 22353 instruct pblendvb2L(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22354 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22355 match(Set dst (VectorBlend (Binary dst src) mask)); 22356 effect(TEMP xmm_0); 22357 format %{ "pblendvb $dst,$src,$mask\t! blend packed2L" %} 22358 ins_encode %{ 22359 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22360 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22361 } 22362 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); 22363 %} 22364 ins_pipe( pipe_slow ); 22365 %} 22366 22367 instruct vpblendvb2L(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22368 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22369 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22370 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed2L" %} 22371 ins_encode %{ 22372 int vector_len = 0; 22373 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22374 %} 22375 ins_pipe( pipe_slow ); 22376 %} 22377 22378 instruct vpblendvb4L(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22379 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22380 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22381 format %{ "vpblendvb $dst,$src1,$src2,$mask\t! blend packed4L" %} 22382 ins_encode %{ 22383 int vector_len = 1; 22384 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22385 %} 22386 ins_pipe( pipe_slow ); 22387 %} 22388 22389 instruct blendvpd1D(vecD dst, vecD src, vecD mask, rxmm0 xmm_0) %{ 22390 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22391 match(Set dst (VectorBlend (Binary dst src) mask)); 22392 effect(TEMP xmm_0); 22393 format %{ "blendvpd $dst,$src,$mask\t! packed1D" %} 22394 ins_encode %{ 22395 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22396 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22397 } 22398 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 22399 %} 22400 ins_pipe( pipe_slow ); 22401 %} 22402 22403 instruct vblendvpd1D(vecD dst, vecD src1, vecD src2, vecD mask) %{ 22404 predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22405 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22406 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed1D" %} 22407 ins_encode %{ 22408 int vector_len = 0; 22409 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22410 %} 22411 ins_pipe( pipe_slow ); 22412 %} 22413 22414 instruct blendvpd2D(vecX dst, vecX src, vecX mask, rxmm0 xmm_0) %{ 22415 predicate(UseAVX == 0 && UseSSE > 3 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22416 match(Set dst (VectorBlend (Binary dst src) mask)); 22417 effect(TEMP xmm_0); 22418 format %{ "blendvpd $dst,$src,$mask\t! packed2D" %} 22419 ins_encode %{ 22420 if ($mask$$XMMRegister != $xmm_0$$XMMRegister) { 22421 __ movdqu($xmm_0$$XMMRegister, $mask$$XMMRegister); 22422 } 22423 __ blendvpd($dst$$XMMRegister, $src$$XMMRegister); 22424 %} 22425 ins_pipe( pipe_slow ); 22426 %} 22427 22428 instruct vblendvpd2D(vecX dst, vecX src1, vecX src2, vecX mask) %{ 22429 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22430 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22431 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed2D" %} 22432 ins_encode %{ 22433 int vector_len = 0; 22434 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22435 %} 22436 ins_pipe( pipe_slow ); 22437 %} 22438 22439 instruct vblendvpd4D(vecY dst, vecY src1, vecY src2, vecY mask) %{ 22440 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); 22441 match(Set dst (VectorBlend (Binary src1 src2) mask)); 22442 format %{ "vblendvpd $dst,$src1,$src2,$mask\t! packed4D" %} 22443 ins_encode %{ 22444 int vector_len = 1; 22445 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vector_len); 22446 %} 22447 ins_pipe( pipe_slow ); 22448 %} 22449 22450 // --------------------------------- NEG -------------------------------------- 22451 // a = -a 22452 instruct vneg2I_reg(vecD dst, vecD src) %{ 22453 predicate(UseSSE > 1 && n->as_Vector()->length() == 2); 22454 match(Set dst (NegVI src)); 22455 effect(TEMP dst); 22456 format %{ "pxor $dst,$dst\n\t" 22457 "psubd $dst, $src\t! neg packed2I" %} 22458 ins_cost(150); 22459 ins_encode %{ 22460 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 22461 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 22462 %} 22463 ins_pipe( pipe_slow ); 22464 %} 22465 22466 instruct vneg4I_reg(vecX dst, vecX src) %{ 22467 predicate(UseSSE > 1 && n->as_Vector()->length() == 4); 22468 match(Set dst (NegVI src)); 22469 effect(TEMP dst); 22470 format %{ "pxor $dst,$dst\n\t" 22471 "psubd $dst, $src\t! neg packed4I" %} 22472 ins_cost(150); 22473 ins_encode %{ 22474 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 22475 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 22476 %} 22477 ins_pipe( pipe_slow ); 22478 %} 22479 22480 instruct vneg8I_reg(vecY dst, vecY src, vecY tmp) %{ 22481 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 22482 match(Set dst (NegVI src)); 22483 effect(TEMP tmp); 22484 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 22485 "vpsubd $dst,$tmp,$src\t! neg packed8I" %} 22486 ins_cost(150); 22487 ins_encode %{ 22488 int vector_len = 1; 22489 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 22490 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 22491 %} 22492 ins_pipe( pipe_slow ); 22493 %} 22494 22495 instruct vneg16I_reg(vecZ dst, vecZ src, vecZ tmp) %{ 22496 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 22497 match(Set dst (NegVI src)); 22498 effect(TEMP tmp); 22499 format %{ "vpxor $tmp,$tmp,$tmp\n\t" 22500 "vpsubd $dst,$tmp,$src\t! neg packed16I" %} 22501 ins_cost(150); 22502 ins_encode %{ 22503 int vector_len = 2; 22504 __ vpxor($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 22505 __ vpsubd($dst$$XMMRegister, $tmp$$XMMRegister, $src$$XMMRegister, vector_len); 22506 %} 22507 ins_pipe( pipe_slow ); 22508 %} 22509 22510 instruct vneg1D(regD dst) %{ 22511 predicate((UseSSE>=2) && (UseAVX == 0)); 22512 match(Set dst (NegVD dst)); 22513 ins_cost(150); 22514 format %{ "xorpd $dst,[0x8000000000000000] \t# $dst = -$dst neg packed1D" %} 22515 ins_encode %{ 22516 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 22517 %} 22518 ins_pipe(pipe_slow); 22519 %} 22520 22521 instruct vneg1D_reg(vecX dst, vecX src) %{ 22522 predicate(UseAVX > 0 && n->as_Vector()->length() == 1); 22523 match(Set dst (NegVD src)); 22524 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed1D" %} 22525 ins_cost(150); 22526 ins_encode %{ 22527 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 22528 ExternalAddress(double_signflip())); 22529 %} 22530 ins_pipe( pipe_slow ); 22531 %} 22532 22533 instruct vneg2D_reg(vecX dst) %{ 22534 predicate((UseSSE>=2)); 22535 match(Set dst (NegVD dst)); 22536 ins_cost(150); 22537 format %{ "xorpd $dst,[0x8000000000000000]\t# $dst = -$dst neg packed2D" %} 22538 ins_encode %{ 22539 __ xorpd($dst$$XMMRegister, ExternalAddress(vector_double_signflip())); 22540 %} 22541 ins_pipe(pipe_slow); 22542 %} 22543 22544 22545 instruct vneg4D_reg(vecY dst, vecY src) %{ 22546 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 22547 match(Set dst (NegVD src)); 22548 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed4D" %} 22549 ins_cost(150); 22550 ins_encode %{ 22551 int vector_len = 1; 22552 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 22553 %} 22554 ins_pipe( pipe_slow ); 22555 %} 22556 22557 instruct vneg8D_reg(vecZ dst, vecZ src) %{ 22558 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 22559 match(Set dst (NegVD src)); 22560 format %{ "vxorpd $dst,$src\t# $dst = -$src neg packed8D" %} 22561 ins_cost(150); 22562 ins_encode %{ 22563 int vector_len = 2; 22564 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signflip()), vector_len); 22565 %} 22566 ins_pipe( pipe_slow ); 22567 %} 22568 22569 instruct vneg2F_reg(vecD dst) %{ 22570 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 22571 match(Set dst (NegVF dst)); 22572 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed2F" %} 22573 ins_cost(150); 22574 ins_encode %{ 22575 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 22576 %} 22577 ins_pipe( pipe_slow ); 22578 %} 22579 22580 instruct vneg4F_reg(vecX dst) %{ 22581 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 22582 match(Set dst (NegVF dst)); 22583 format %{ "xorps $dst,[0x80000000]\t# $dst = -$dst neg packed4F" %} 22584 ins_cost(150); 22585 ins_encode %{ 22586 __ xorps($dst$$XMMRegister, ExternalAddress(vector_float_signflip())); 22587 %} 22588 ins_pipe( pipe_slow ); 22589 %} 22590 22591 instruct vneg8F_reg(vecY dst, vecY src) %{ 22592 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 22593 match(Set dst (NegVF src)); 22594 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed8F" %} 22595 ins_cost(150); 22596 ins_encode %{ 22597 int vector_len = 1; 22598 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 22599 %} 22600 ins_pipe( pipe_slow ); 22601 %} 22602 22603 instruct vneg16F_reg(vecZ dst, vecZ src) %{ 22604 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 22605 match(Set dst (NegVF src)); 22606 format %{ "vxorps $dst,$src\t# $dst = -$src neg packed16F" %} 22607 ins_cost(150); 22608 ins_encode %{ 22609 int vector_len = 2; 22610 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signflip()), vector_len); 22611 %} 22612 ins_pipe( pipe_slow ); 22613 %} 22614 22615 // --------------------------------- ABS -------------------------------------- 22616 // a = |a| 22617 instruct vabs8B_reg(vecD dst, vecD src) %{ 22618 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22619 match(Set dst (AbsV src)); 22620 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed8B" %} 22621 ins_cost(150); 22622 ins_encode %{ 22623 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 22624 %} 22625 ins_pipe( pipe_slow ); 22626 %} 22627 22628 instruct vabs16B_reg(vecX dst, vecX src) %{ 22629 predicate(UseSSE > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22630 match(Set dst (AbsV src)); 22631 format %{ "pabsb $dst,$src\t# $dst = |$src| abs packed16B" %} 22632 ins_cost(150); 22633 ins_encode %{ 22634 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 22635 %} 22636 ins_pipe( pipe_slow ); 22637 %} 22638 22639 instruct vabs32B_reg(vecY dst, vecY src) %{ 22640 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22641 match(Set dst (AbsV src)); 22642 format %{ "vpabsb $dst,$src\t# $dst = |$src| abs packed32B" %} 22643 ins_cost(150); 22644 ins_encode %{ 22645 int vector_len = 1; 22646 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22647 %} 22648 ins_pipe( pipe_slow ); 22649 %} 22650 22651 instruct vabs64B_reg(vecZ dst, vecZ src) %{ 22652 predicate(UseAVX > 2 && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 22653 match(Set dst (AbsV src)); 22654 format %{ "evpabsb $dst,$src\t# $dst = |$src| abs packed64B" %} 22655 ins_cost(150); 22656 ins_encode %{ 22657 int vector_len = 2; 22658 __ evpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22659 %} 22660 ins_pipe( pipe_slow ); 22661 %} 22662 22663 instruct vabs4S_reg(vecD dst, vecD src) %{ 22664 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22665 match(Set dst (AbsV src)); 22666 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed4S" %} 22667 ins_cost(150); 22668 ins_encode %{ 22669 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 22670 %} 22671 ins_pipe( pipe_slow ); 22672 %} 22673 22674 instruct vabs8S_reg(vecX dst, vecX src) %{ 22675 predicate(UseSSE > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22676 match(Set dst (AbsV src)); 22677 format %{ "pabsw $dst,$src\t# $dst = |$src| abs packed8S" %} 22678 ins_cost(150); 22679 ins_encode %{ 22680 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 22681 %} 22682 ins_pipe( pipe_slow ); 22683 %} 22684 22685 instruct vabs16S_reg(vecY dst, vecY src) %{ 22686 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22687 match(Set dst (AbsV src)); 22688 format %{ "vpabsw $dst,$src\t# $dst = |$src| abs packed16S" %} 22689 ins_cost(150); 22690 ins_encode %{ 22691 int vector_len = 1; 22692 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22693 %} 22694 ins_pipe( pipe_slow ); 22695 %} 22696 22697 instruct vabs32S_reg(vecZ dst, vecZ src) %{ 22698 predicate(UseAVX > 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 22699 match(Set dst (AbsV src)); 22700 format %{ "evpabsw $dst,$src\t# $dst = |$src| abs packed32S" %} 22701 ins_cost(150); 22702 ins_encode %{ 22703 int vector_len = 2; 22704 __ evpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22705 %} 22706 ins_pipe( pipe_slow ); 22707 %} 22708 22709 instruct vabs2I_reg(vecD dst, vecD src) %{ 22710 predicate(UseSSE > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22711 match(Set dst (AbsV src)); 22712 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed2I" %} 22713 ins_cost(150); 22714 ins_encode %{ 22715 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 22716 %} 22717 ins_pipe( pipe_slow ); 22718 %} 22719 22720 instruct vabs4I_reg(vecX dst, vecX src) %{ 22721 predicate(UseSSE > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22722 match(Set dst (AbsV src)); 22723 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packed4I" %} 22724 ins_cost(150); 22725 ins_encode %{ 22726 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 22727 %} 22728 ins_pipe( pipe_slow ); 22729 %} 22730 22731 instruct vabs8I_reg(vecY dst, vecY src) %{ 22732 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22733 match(Set dst (AbsV src)); 22734 format %{ "vpabsd $dst,$src\t# $dst = |$src| abs packed8I" %} 22735 ins_cost(150); 22736 ins_encode %{ 22737 int vector_len = 1; 22738 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22739 %} 22740 ins_pipe( pipe_slow ); 22741 %} 22742 22743 instruct vabs16I_reg(vecZ dst, vecZ src) %{ 22744 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); 22745 match(Set dst (AbsV src)); 22746 format %{ "evpabsd $dst,$src\t# $dst = |$src| abs packed16I" %} 22747 ins_cost(150); 22748 ins_encode %{ 22749 int vector_len = 2; 22750 __ evpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22751 %} 22752 ins_pipe( pipe_slow ); 22753 %} 22754 22755 instruct vabs2L_reg(vecX dst, vecX src) %{ 22756 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22757 match(Set dst (AbsV src)); 22758 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed2L" %} 22759 ins_cost(150); 22760 ins_encode %{ 22761 int vector_len = 0; 22762 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22763 %} 22764 ins_pipe( pipe_slow ); 22765 %} 22766 22767 instruct vabs4L_reg(vecY dst, vecY src) %{ 22768 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22769 match(Set dst (AbsV src)); 22770 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed4L" %} 22771 ins_cost(150); 22772 ins_encode %{ 22773 int vector_len = 1; 22774 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22775 %} 22776 ins_pipe( pipe_slow ); 22777 %} 22778 22779 instruct vabs8L_reg(vecZ dst, vecZ src) %{ 22780 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); 22781 match(Set dst (AbsV src)); 22782 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packed8L" %} 22783 ins_cost(150); 22784 ins_encode %{ 22785 int vector_len = 2; 22786 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len); 22787 %} 22788 ins_pipe( pipe_slow ); 22789 %} 22790 22791 instruct vabs1D_reg(vecD dst) %{ 22792 predicate(UseSSE > 0 && n->as_Vector()->length() == 1); 22793 match(Set dst (AbsVD dst)); 22794 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed1D" %} 22795 ins_cost(150); 22796 ins_encode %{ 22797 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 22798 %} 22799 ins_pipe( pipe_slow ); 22800 %} 22801 22802 instruct vabs2D_reg(vecX dst) %{ 22803 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 22804 match(Set dst (AbsVD dst)); 22805 format %{ "andpd $dst,[0x7FFFFFFFFFFFFFFF]\t# $dst = |$dst| abs packed2D" %} 22806 ins_cost(150); 22807 ins_encode %{ 22808 __ andpd($dst$$XMMRegister, ExternalAddress(vector_double_signmask())); 22809 %} 22810 ins_pipe( pipe_slow ); 22811 %} 22812 22813 instruct vabs4D_reg(vecY dst, vecY src) %{ 22814 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 22815 match(Set dst (AbsVD src)); 22816 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed4D" %} 22817 ins_cost(150); 22818 ins_encode %{ 22819 int vector_len = 1; 22820 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 22821 %} 22822 ins_pipe( pipe_slow ); 22823 %} 22824 22825 instruct vabs8D_reg(vecZ dst, vecZ src) %{ 22826 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 22827 match(Set dst (AbsVD src)); 22828 format %{ "vandpd $dst,$src\t# $dst = |$src| abs packed8D" %} 22829 ins_cost(150); 22830 ins_encode %{ 22831 int vector_len = 2; 22832 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_double_signmask()), vector_len); 22833 %} 22834 ins_pipe( pipe_slow ); 22835 %} 22836 22837 instruct vabs2F_reg(vecD dst) %{ 22838 predicate(UseSSE > 0 && n->as_Vector()->length() == 2); 22839 match(Set dst (AbsVF dst)); 22840 format %{ "andps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed2F" %} 22841 ins_cost(150); 22842 ins_encode %{ 22843 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 22844 %} 22845 ins_pipe( pipe_slow ); 22846 %} 22847 22848 instruct vabs4F_reg(vecX dst) %{ 22849 predicate(UseSSE > 0 && n->as_Vector()->length() == 4); 22850 match(Set dst (AbsVF dst)); 22851 format %{ "vandps $dst,[0x7FFFFFFF]\t# $dst = |$dst| abs packed4F" %} 22852 ins_cost(150); 22853 ins_encode %{ 22854 __ andps($dst$$XMMRegister, ExternalAddress(vector_float_signmask())); 22855 %} 22856 ins_pipe( pipe_slow ); 22857 %} 22858 22859 instruct vabs8F_reg(vecY dst, vecY src) %{ 22860 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 22861 match(Set dst (AbsVF src)); 22862 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed8F" %} 22863 ins_cost(150); 22864 ins_encode %{ 22865 int vector_len = 1; 22866 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 22867 %} 22868 ins_pipe( pipe_slow ); 22869 %} 22870 22871 instruct vabs16F_reg(vecZ dst, vecZ src) %{ 22872 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 22873 match(Set dst (AbsVF src)); 22874 format %{ "vandps $dst,$src\t# $dst = |$src| abs packed16F" %} 22875 ins_cost(150); 22876 ins_encode %{ 22877 int vector_len = 2; 22878 __ vandps($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_float_signmask()), vector_len); 22879 %} 22880 ins_pipe( pipe_slow ); 22881 %} 22882 22883 //------------------------------------- NOT -------------------------------------------- 22884 instruct vnot4B(vecS dst, vecS src) %{ 22885 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 4); 22886 match(Set dst (NotV src)); 22887 effect(TEMP dst); 22888 format %{ "pxor $dst,$src\t! not vectors (4 bytes)" %} 22889 ins_encode %{ 22890 __ movdl($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 22891 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 22892 %} 22893 ins_pipe( pipe_slow ); 22894 %} 22895 22896 instruct vnot4B_reg(vecS dst, vecS src, rRegL scratch) %{ 22897 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 22898 match(Set dst (NotV src)); 22899 effect(TEMP scratch); 22900 format %{ "vpxor $dst,$src\t! not vectors (4 bytes)" %} 22901 ins_encode %{ 22902 int vector_len = 0; 22903 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22904 %} 22905 ins_pipe( pipe_slow ); 22906 %} 22907 22908 instruct vnot8B(vecD dst, vecD src) %{ 22909 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 8); 22910 match(Set dst (NotV src)); 22911 effect(TEMP dst); 22912 format %{ "pxor $dst,$src\t! not vectors (8 bytes)" %} 22913 ins_encode %{ 22914 __ movq($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 22915 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 22916 %} 22917 ins_pipe( pipe_slow ); 22918 %} 22919 22920 instruct vnot8B_reg(vecD dst, vecD src, rRegL scratch) %{ 22921 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 22922 match(Set dst (NotV src)); 22923 effect(TEMP scratch); 22924 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (8 bytes)" %} 22925 ins_encode %{ 22926 int vector_len = 0; 22927 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22928 %} 22929 ins_pipe( pipe_slow ); 22930 %} 22931 22932 instruct vnot16B(vecX dst, vecX src) %{ 22933 predicate(UseSSE > 1 && n->as_Vector()->length_in_bytes() == 16); 22934 match(Set dst (NotV src)); 22935 effect(TEMP dst); 22936 format %{ "pxor $dst,$src\t! not vectors (16 bytes)" %} 22937 ins_encode %{ 22938 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_all_bits_set())); 22939 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 22940 %} 22941 ins_pipe( pipe_slow ); 22942 %} 22943 22944 instruct vnot16B_reg(vecX dst, vecX src, rRegL scratch) %{ 22945 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 22946 match(Set dst (NotV src)); 22947 effect(TEMP scratch); 22948 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (16 bytes)" %} 22949 ins_encode %{ 22950 int vector_len = 0; 22951 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22952 %} 22953 ins_pipe( pipe_slow ); 22954 %} 22955 22956 instruct vnot32B_reg(vecY dst, vecY src, rRegL scratch) %{ 22957 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 32); 22958 match(Set dst (NotV src)); 22959 effect(TEMP scratch); 22960 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (32 bytes)" %} 22961 ins_encode %{ 22962 int vector_len = 1; 22963 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22964 %} 22965 ins_pipe( pipe_slow ); 22966 %} 22967 22968 instruct vnot64B_reg(vecZ dst, vecZ src, rRegL scratch) %{ 22969 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 22970 match(Set dst (NotV src)); 22971 effect(TEMP scratch); 22972 format %{ "vpxor $dst,$src,0xFFFFFFFF \t! not vectors (64 bytes)" %} 22973 ins_encode %{ 22974 int vector_len = 2; 22975 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 22976 %} 22977 ins_pipe( pipe_slow ); 22978 %} 22979 22980 instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{ 22981 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 22982 match(Set dst (VectorTest src1 src2 )); 22983 format %{ "vptest $src1,$src2\n\t" 22984 "setb $dst\t!" %} 22985 ins_encode %{ 22986 int vector_len = 0; 22987 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 22988 __ setb(Assembler::carrySet, $dst$$Register); 22989 __ movzbl($dst$$Register, $dst$$Register); 22990 %} 22991 ins_pipe( pipe_slow ); 22992 %} 22993 22994 instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{ 22995 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 22996 match(Set dst (VectorTest src1 src2 )); 22997 format %{ "vptest $src1,$src2\n\t" 22998 "setb $dst\t!" %} 22999 ins_encode %{ 23000 int vector_len = 0; 23001 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23002 __ setb(Assembler::notZero, $dst$$Register); 23003 __ movzbl($dst$$Register, $dst$$Register); 23004 %} 23005 ins_pipe( pipe_slow ); 23006 %} 23007 23008 instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{ 23009 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet); 23010 match(Set dst (VectorTest src1 src2 )); 23011 format %{ "vptest $src1,$src2\n\t" 23012 "setb $dst\t!" %} 23013 ins_encode %{ 23014 int vector_len = 1; 23015 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23016 __ setb(Assembler::carrySet, $dst$$Register); 23017 __ movzbl($dst$$Register, $dst$$Register); 23018 %} 23019 ins_pipe( pipe_slow ); 23020 %} 23021 23022 instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{ 23023 predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero); 23024 match(Set dst (VectorTest src1 src2 )); 23025 format %{ "vptest $src1,$src2\n\t" 23026 "setb $dst\t!" %} 23027 ins_encode %{ 23028 int vector_len = 1; 23029 __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vector_len); 23030 __ setb(Assembler::notZero, $dst$$Register); 23031 __ movzbl($dst$$Register, $dst$$Register); 23032 %} 23033 ins_pipe( pipe_slow ); 23034 %} 23035 23036 instruct loadmask8b(vecD dst, vecD src) %{ 23037 predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23038 match(Set dst (VectorLoadMask src)); 23039 effect(TEMP dst); 23040 format %{ "pxor $dst,$dst\n\t" 23041 "psubb $dst,$src\t! load mask (8B to 8B)" %} 23042 ins_encode %{ 23043 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23044 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23045 %} 23046 ins_pipe( pipe_slow ); 23047 %} 23048 23049 instruct loadmask16b(vecX dst, vecX src) %{ 23050 predicate(UseSSE >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23051 match(Set dst (VectorLoadMask src)); 23052 effect(TEMP dst); 23053 format %{ "vpxor $dst,$dst\n\t" 23054 "vpsubb $dst,$src\t! load mask (16B to 16B)" %} 23055 ins_encode %{ 23056 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23057 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23058 %} 23059 ins_pipe( pipe_slow ); 23060 %} 23061 23062 instruct loadmask32b(vecY dst, vecY src) %{ 23063 predicate(UseAVX >= 2 && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23064 match(Set dst (VectorLoadMask src)); 23065 effect(TEMP dst); 23066 format %{ "vpxor $dst,$dst\n\t" 23067 "vpsubb $dst,$src\t! load mask (32B to 32B)" %} 23068 ins_encode %{ 23069 int vector_len = 1; 23070 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23071 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 23072 %} 23073 ins_pipe( pipe_slow ); 23074 %} 23075 23076 instruct loadmask64b(vecZ dst, vecZ src) %{ 23077 predicate(UseAVX > 0 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); 23078 match(Set dst (VectorLoadMask src)); 23079 effect(TEMP dst); 23080 format %{ "vpxor $dst,$dst\n\t" 23081 "vpsubb $dst,$src\t! load mask (64B to 64B)" %} 23082 ins_encode %{ 23083 int vector_len = 2; 23084 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23085 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 23086 %} 23087 ins_pipe( pipe_slow ); 23088 %} 23089 23090 instruct loadmask4s(vecD dst, vecS src) %{ 23091 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23092 match(Set dst (VectorLoadMask src)); 23093 effect(TEMP dst); 23094 format %{ "pxor $dst,$dst\n\t" 23095 "psubb $dst,$src\n\t" 23096 "pmovsxbw $dst\t! load mask (4B to 4S)" %} 23097 ins_encode %{ 23098 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23099 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23100 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 23101 %} 23102 ins_pipe( pipe_slow ); 23103 %} 23104 23105 instruct loadmask8s(vecX dst, vecD src) %{ 23106 predicate(UseSSE >= 4 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23107 match(Set dst (VectorLoadMask src)); 23108 effect(TEMP dst); 23109 format %{ "pxor $dst,$dst\n\t" 23110 "psubb $dst,$src\n\t" 23111 "pmovsxbw $dst\t! load mask (8B to 8S)" %} 23112 ins_encode %{ 23113 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23114 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23115 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 23116 %} 23117 ins_pipe( pipe_slow ); 23118 %} 23119 23120 instruct loadmask16s(vecY dst, vecX src) %{ 23121 predicate(UseAVX >= 2 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23122 match(Set dst (VectorLoadMask src)); 23123 effect(TEMP dst); 23124 format %{ "vpxor $dst,$dst\n\t" 23125 "vpsubb $dst,$src\n\t" 23126 "vpmovsxbw $dst\t! load mask (16B to 16S)" %} 23127 ins_encode %{ 23128 int vector_len = 1; 23129 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23130 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 23131 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23132 %} 23133 ins_pipe( pipe_slow ); 23134 %} 23135 23136 instruct loadmask32s(vecZ dst, vecY src) %{ 23137 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); 23138 match(Set dst (VectorLoadMask src)); 23139 effect(TEMP dst); 23140 format %{ "vpxor $dst,$dst\n\t" 23141 "vpsubb $dst,$src\n\t" 23142 "vpmovsxbw $dst\t! load mask (32B to 32S)" %} 23143 ins_encode %{ 23144 int vector_len = 2; 23145 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 1); 23146 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 1); 23147 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23148 %} 23149 ins_pipe( pipe_slow ); 23150 %} 23151 23152 instruct loadmask2i(vecD dst, vecS src) %{ 23153 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && 23154 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23155 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23156 match(Set dst (VectorLoadMask src)); 23157 effect(TEMP dst); 23158 format %{ "pxor $dst,$dst\n\t" 23159 "psubb $dst,$src\n\t" 23160 "pmovsxbd $dst\t! load mask (2B to 2I)" %} 23161 ins_encode %{ 23162 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23163 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23164 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 23165 %} 23166 ins_pipe( pipe_slow ); 23167 %} 23168 23169 instruct loadmask4i(vecX dst, vecS src) %{ 23170 predicate(UseSSE >= 4 && n->as_Vector()->length() == 4 && 23171 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23172 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23173 match(Set dst (VectorLoadMask src)); 23174 effect(TEMP dst); 23175 format %{ "pxor $dst,$dst\n\t" 23176 "psubb $dst,$src\n\t" 23177 "pmovsxbd $dst\t! load mask (4B to 4I)" %} 23178 ins_encode %{ 23179 int vector_len = 0; 23180 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23181 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23182 __ pmovsxbd($dst$$XMMRegister, $dst$$XMMRegister); 23183 %} 23184 ins_pipe( pipe_slow ); 23185 %} 23186 23187 instruct loadmask8i(vecY dst, vecD src) %{ 23188 predicate(UseAVX >= 2 && n->as_Vector()->length() == 8 && 23189 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23190 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23191 match(Set dst (VectorLoadMask src)); 23192 effect(TEMP dst); 23193 format %{ "vpxor $dst,$dst\n\t" 23194 "vpsubb $dst,$src\n\t" 23195 "vpmovsxbd $dst\t! load mask (8B to 8I)" %} 23196 ins_encode %{ 23197 int vector_len = 1; 23198 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23199 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 23200 __ vpmovsxbd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23201 %} 23202 ins_pipe( pipe_slow ); 23203 %} 23204 23205 instruct loadmask16i(vecZ dst, vecX src, vecZ tmp) %{ 23206 predicate(UseAVX > 2 && n->as_Vector()->length() == 16 && 23207 (n->bottom_type()->is_vect()->element_basic_type() == T_INT || 23208 n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); 23209 match(Set dst (VectorLoadMask src)); 23210 effect(TEMP dst, TEMP tmp); 23211 format %{ "vpxor $dst,$dst\n\t" 23212 "vpmovzxbd $tmp,$src\n\t" 23213 "vpsubd $dst,$tmp\t! load mask (16B to 16I)" %} 23214 ins_encode %{ 23215 int vector_len = 2; 23216 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23217 __ vpmovzxbd($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 23218 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23219 %} 23220 ins_pipe( pipe_slow ); 23221 %} 23222 23223 instruct loadmask1l(vecD dst, vecS src) %{ 23224 predicate(UseSSE >= 4 && n->as_Vector()->length() == 1 && 23225 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23226 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23227 match(Set dst (VectorLoadMask src)); 23228 effect(TEMP dst); 23229 format %{ "pxor $dst,$dst\n\t" 23230 "psubb $dst,$src\n\t" 23231 "pmovsxbq $dst\t! load mask (1B to 1L)" %} 23232 ins_encode %{ 23233 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23234 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23235 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 23236 %} 23237 ins_pipe( pipe_slow ); 23238 %} 23239 23240 instruct loadmask2l(vecX dst, vecS src) %{ 23241 predicate(UseSSE >= 4 && n->as_Vector()->length() == 2 && 23242 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23243 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23244 match(Set dst (VectorLoadMask src)); 23245 effect(TEMP dst); 23246 format %{ "pxor $dst,$dst\n\t" 23247 "psubb $dst,$src\n\t" 23248 "pmovsxbq $dst\t! load mask (2B to 2L)" %} 23249 ins_encode %{ 23250 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 23251 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 23252 __ pmovsxbq($dst$$XMMRegister, $dst$$XMMRegister); 23253 %} 23254 ins_pipe( pipe_slow ); 23255 %} 23256 23257 instruct loadmask4l(vecY dst, vecS src) %{ 23258 predicate(UseAVX >= 2 && n->as_Vector()->length() == 4 && 23259 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23260 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23261 match(Set dst (VectorLoadMask src)); 23262 effect(TEMP dst); 23263 format %{ "vpxor $dst,$dst\n\t" 23264 "vpsubb $dst,$src\n\t" 23265 "vpmovsxbq $dst\t! load mask (4B to 4L)" %} 23266 ins_encode %{ 23267 int vector_len = 1; 23268 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23269 __ vpsubb($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, 0); 23270 __ vpmovsxbq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23271 %} 23272 ins_pipe( pipe_slow ); 23273 %} 23274 23275 instruct loadmask8l(vecZ dst, vecD src, vecZ tmp) %{ 23276 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && 23277 (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || 23278 n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); 23279 match(Set dst (VectorLoadMask src)); 23280 effect(TEMP dst, TEMP tmp); 23281 format %{ "vpxor $dst,$dst\n\t" 23282 "vpmovzxbq $tmp,$src\n\t" 23283 "vpsubq $dst,$tmp\t! load mask (8B to 8L)" %} 23284 ins_encode %{ 23285 int vector_len = 2; 23286 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23287 __ vpmovzxbq($tmp$$XMMRegister, $src$$XMMRegister, vector_len); 23288 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23289 %} 23290 ins_pipe( pipe_slow ); 23291 %} 23292 23293 instruct storemask8b(vecD dst, vecD src) %{ 23294 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23295 match(Set dst (VectorStoreMask src)); 23296 format %{ "vpabsb $dst,$src\t! store mask (8B to 8B)" %} 23297 ins_encode %{ 23298 int vector_len = 0; 23299 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23300 %} 23301 ins_pipe( pipe_slow ); 23302 %} 23303 23304 instruct storemask16b(vecX dst, vecX src) %{ 23305 predicate(UseAVX > 0 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23306 match(Set dst (VectorStoreMask src)); 23307 format %{ "vpabsb $dst,$src\t! store mask (16B to 16B)" %} 23308 ins_encode %{ 23309 int vector_len = 0; 23310 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23311 %} 23312 ins_pipe( pipe_slow ); 23313 %} 23314 23315 instruct storemask32b(vecY dst, vecY src) %{ 23316 predicate(UseAVX > 1 && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23317 match(Set dst (VectorStoreMask src)); 23318 format %{ "vpabsb $dst,$src\t! store mask (32B to 32B)" %} 23319 ins_encode %{ 23320 int vector_len = 1; 23321 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23322 %} 23323 ins_pipe( pipe_slow ); 23324 %} 23325 23326 instruct storemask64b(vecZ dst, vecZ src, rRegL scratch) %{ 23327 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 1); 23328 match(Set dst (VectorStoreMask src)); 23329 effect(TEMP scratch); 23330 format %{ "vpcmpeqb k2,$src,0xFFFFFFFF\n\t" 23331 "vmovdqub $dst,k2,0x01010101\t! store mask (64B to 64B)" %} 23332 ins_encode %{ 23333 int vector_len = 2; 23334 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23335 Assembler::ComparisonPredicate cp = Assembler::eq; 23336 __ evpcmpb(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 23337 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, vector_len, $scratch$$Register); 23338 %} 23339 ins_pipe( pipe_slow ); 23340 %} 23341 23342 instruct storemask4s(vecS dst, vecD src) %{ 23343 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23344 match(Set dst (VectorStoreMask src)); 23345 format %{ "vpabsw $dst,$src\n\t" 23346 "vpackuswb $dst,$dst,$dst\t! store mask (4S to 4B)" %} 23347 ins_encode %{ 23348 int vector_len = 0; 23349 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23350 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23351 %} 23352 ins_pipe( pipe_slow ); 23353 %} 23354 23355 instruct storemask8s(vecD dst, vecX src) %{ 23356 predicate(UseAVX > 0 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23357 match(Set dst (VectorStoreMask src)); 23358 format %{ "vpabsw $dst,$src\n\t" 23359 "vpackuswb $dst,$dst,$dst\t! store mask (8S to 8B)" %} 23360 ins_encode %{ 23361 int vector_len = 0; 23362 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23363 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23364 %} 23365 ins_pipe( pipe_slow ); 23366 %} 23367 23368 instruct storemask16s(vecX dst, vecY src, vecY tmp) %{ 23369 predicate(UseAVX > 1 && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23370 match(Set dst (VectorStoreMask src)); 23371 effect(TEMP dst, TEMP tmp); 23372 format %{ "vpabsw $dst,$src\n\t" 23373 "vextracti128 $tmp,$dst\n\t" 23374 "vpackuswb $dst,$dst,$tmp\t! store mask (16S to 16B)" %} 23375 ins_encode %{ 23376 int vector_len = 1; 23377 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23378 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 23379 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23380 %} 23381 ins_pipe( pipe_slow ); 23382 %} 23383 23384 instruct storemask32s(vecY dst, vecZ src, rRegL scratch) %{ 23385 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 2); 23386 match(Set dst (VectorStoreMask src)); 23387 effect(TEMP scratch); 23388 format %{ "vpcmpeqw k2,$src,0xFFFFFFFF\n\t" 23389 "vmovdqub $dst,k2,0x01010101\t! store mask (32S to 32B)" %} 23390 ins_encode %{ 23391 int vector_len = 2; 23392 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23393 Assembler::ComparisonPredicate cp = Assembler::eq; 23394 __ evpcmpw(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 23395 // The dst is 256-bit - thus we can do a smaller move. 23396 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 1, $scratch$$Register); 23397 %} 23398 ins_pipe( pipe_slow ); 23399 %} 23400 23401 23402 instruct storemask2i(vecS dst, vecD src) %{ 23403 predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23404 match(Set dst (VectorStoreMask src)); 23405 format %{ "vpabsd $dst,$src\n\t" 23406 "vpackusdw $dst,$dst,$dst\n\t" 23407 "vpackuswb $dst,$dst,$dst\t! store mask (2I to 2B)" %} 23408 ins_encode %{ 23409 int vector_len = 0; 23410 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23411 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23412 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23413 %} 23414 ins_pipe( pipe_slow ); 23415 %} 23416 23417 instruct storemask4i(vecS dst, vecX src) %{ 23418 predicate(UseAVX > 0 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23419 match(Set dst (VectorStoreMask src)); 23420 format %{ "vpabsd $dst,$src\n\t" 23421 "vpackusdw $dst,$dst,$dst\n\t" 23422 "vpackuswb $dst,$dst,$dst\t! store mask (4I to 4B)" %} 23423 ins_encode %{ 23424 int vector_len = 0; 23425 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23426 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23427 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23428 %} 23429 ins_pipe( pipe_slow ); 23430 %} 23431 23432 instruct storemask8i(vecD dst, vecY src, vecY tmp) %{ 23433 predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23434 match(Set dst (VectorStoreMask src)); 23435 effect(TEMP dst, TEMP tmp); 23436 format %{ "vpxor $dst,$dst\n\t" 23437 "vpsubd $dst,$src\n\t" 23438 "vextracti128 $tmp,$dst\n\t" 23439 "vpackusdw $dst,$dst,$tmp\n\t" 23440 "vpackuswb $dst,$dst,$dst\t! store mask (8I to 8B)" %} 23441 ins_encode %{ 23442 int vector_len = 1; 23443 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23444 __ vpsubd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister, vector_len); 23445 __ vextracti128($tmp$$XMMRegister, $dst$$XMMRegister, 0x1); 23446 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); 23447 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 23448 %} 23449 ins_pipe( pipe_slow ); 23450 %} 23451 23452 instruct storemask16i(vecX dst, vecZ src, rRegL scratch) %{ 23453 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 4); 23454 match(Set dst (VectorStoreMask src)); 23455 effect(TEMP scratch); 23456 format %{ "vpcmpeqd k2,$src,0xFFFFFFFF\n\t" 23457 "vmovdqub $dst,k2,0x01010101\t! store mask (16I to 16B)" %} 23458 ins_encode %{ 23459 int vector_len = 2; 23460 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23461 __ evpcmpeqd(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), vector_len, $scratch$$Register); 23462 // The dst is only 128-bit - thus we can do a smaller move. 23463 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register); 23464 %} 23465 ins_pipe( pipe_slow ); 23466 %} 23467 23468 instruct storemask1l(vecS dst, vecD src) %{ 23469 predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23470 match(Set dst (VectorStoreMask src)); 23471 format %{ "vpabsd $dst,$src\n\t" 23472 "vpackusdw $dst,$dst,$dst\n\t" 23473 "vpackuswb $dst,$dst,$dst\t! store mask (1L to 1B)" %} 23474 ins_encode %{ 23475 int vector_len = 0; 23476 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23477 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23478 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23479 %} 23480 ins_pipe( pipe_slow ); 23481 %} 23482 23483 instruct storemask2l(vecS dst, vecX src) %{ 23484 predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23485 match(Set dst (VectorStoreMask src)); 23486 format %{ "vpshufd $dst,$src,0x8\n\t" 23487 "vpabsd $dst,$dst\n\t" 23488 "vpackusdw $dst,$dst,$dst\n\t" 23489 "vpackuswb $dst,$dst,$dst\t! store mask (2L to 2B)" %} 23490 ins_encode %{ 23491 int vector_len = 0; 23492 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8, vector_len); 23493 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23494 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23495 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23496 %} 23497 ins_pipe( pipe_slow ); 23498 %} 23499 23500 instruct storemask4l(vecS dst, vecY src, rRegL scratch) %{ 23501 predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23502 match(Set dst (VectorStoreMask src)); 23503 effect(TEMP scratch, TEMP dst); 23504 format %{ "vmovdqu $dst,[0,2,4,6,1,3,5,7]\n\t" 23505 "vpermd $dst,$dst,$src," 23506 "vpabsd $dst,$dst\n\t" 23507 "vpackusdw $dst,$dst,$dst\n\t" 23508 "vpackuswb $dst,$dst,$dst\t! store mask (4L to 4B)" %} 23509 ins_encode %{ 23510 // vpermd and load are 256-bit, but all others are 128-bit instructions. 23511 int vector_len = 0; 23512 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_long_perm_mask()), $scratch$$Register); 23513 __ vpermd($dst$$XMMRegister, $dst$$XMMRegister, $src$$XMMRegister); 23514 __ vpabsd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23515 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23516 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 23517 %} 23518 ins_pipe( pipe_slow ); 23519 %} 23520 23521 instruct storemask8l(vecD dst, vecZ src, rRegL scratch) %{ 23522 predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8 && static_cast<const VectorStoreMaskNode*>(n)->GetInputMaskSize() == 8); 23523 match(Set dst (VectorStoreMask src)); 23524 effect(TEMP scratch); 23525 format %{ "vpcmpeqq k2,$src,0xFFFFFFFF\n\t" 23526 "vmovdqub $dst,k2,0x01010101\t! store mask (8L to 8B)" %} 23527 ins_encode %{ 23528 int vector_len = 2; 23529 KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation. 23530 Assembler::ComparisonPredicate cp = Assembler::eq; 23531 __ evpcmpq(ktmp, k0, $src$$XMMRegister, ExternalAddress(vector_all_bits_set()), cp, vector_len, $scratch$$Register); 23532 // The dst is only 128-bit - thus we can do a smaller move. 23533 __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_byte_bitset()), false, 0, $scratch$$Register); 23534 %} 23535 ins_pipe( pipe_slow ); 23536 %} 23537 23538 // --------------------------------- FMA -------------------------------------- 23539 23540 // a * b + c 23541 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 23542 predicate(UseFMA && n->as_Vector()->length() == 2); 23543 match(Set c (FmaVD c (Binary a b))); 23544 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 23545 ins_cost(150); 23546 ins_encode %{ 23547 int vector_len = 0; 23548 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23549 %} 23550 ins_pipe( pipe_slow ); 23551 %} 23552 23553 // a * b + c 23554 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 23555 predicate(UseFMA && n->as_Vector()->length() == 2); 23556 match(Set c (FmaVD c (Binary a (LoadVector b)))); 23557 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 23558 ins_cost(150); 23559 ins_encode %{ 23560 int vector_len = 0; 23561 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23562 %} 23563 ins_pipe( pipe_slow ); 23564 %} 23565 23566 23567 // a * b + c 23568 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 23569 predicate(UseFMA && n->as_Vector()->length() == 4); 23570 match(Set c (FmaVD c (Binary a b))); 23571 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 23572 ins_cost(150); 23573 ins_encode %{ 23574 int vector_len = 1; 23575 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23576 %} 23577 ins_pipe( pipe_slow ); 23578 %} 23579 23580 // a * b + c 23581 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 23582 predicate(UseFMA && n->as_Vector()->length() == 4); 23583 match(Set c (FmaVD c (Binary a (LoadVector b)))); 23584 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 23585 ins_cost(150); 23586 ins_encode %{ 23587 int vector_len = 1; 23588 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23589 %} 23590 ins_pipe( pipe_slow ); 23591 %} 23592 23593 // a * b + c 23594 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 23595 predicate(UseFMA && n->as_Vector()->length() == 8); 23596 match(Set c (FmaVD c (Binary a b))); 23597 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 23598 ins_cost(150); 23599 ins_encode %{ 23600 int vector_len = 2; 23601 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23602 %} 23603 ins_pipe( pipe_slow ); 23604 %} 23605 23606 // a * b + c 23607 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 23608 predicate(UseFMA && n->as_Vector()->length() == 8); 23609 match(Set c (FmaVD c (Binary a (LoadVector b)))); 23610 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 23611 ins_cost(150); 23612 ins_encode %{ 23613 int vector_len = 2; 23614 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23615 %} 23616 ins_pipe( pipe_slow ); 23617 %} 23618 23619 // a * b + c 23620 instruct vfma2F_reg(vecD a, vecD b, vecD c) %{ 23621 predicate(UseFMA && n->as_Vector()->length() == 2); 23622 match(Set c (FmaVF c (Binary a b))); 23623 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed2F" %} 23624 ins_cost(150); 23625 ins_encode %{ 23626 int vector_len = 0; 23627 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23628 %} 23629 ins_pipe( pipe_slow ); 23630 %} 23631 23632 // a * b + c 23633 instruct vfma2F_mem(vecD a, memory b, vecD c) %{ 23634 predicate(UseFMA && n->as_Vector()->length() == 2); 23635 match(Set c (FmaVF c (Binary a (LoadVector b)))); 23636 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed2F" %} 23637 ins_cost(150); 23638 ins_encode %{ 23639 int vector_len = 0; 23640 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23641 %} 23642 ins_pipe( pipe_slow ); 23643 %} 23644 23645 // a * b + c 23646 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 23647 predicate(UseFMA && n->as_Vector()->length() == 4); 23648 match(Set c (FmaVF c (Binary a b))); 23649 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 23650 ins_cost(150); 23651 ins_encode %{ 23652 int vector_len = 0; 23653 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23654 %} 23655 ins_pipe( pipe_slow ); 23656 %} 23657 23658 // a * b + c 23659 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 23660 predicate(UseFMA && n->as_Vector()->length() == 4); 23661 match(Set c (FmaVF c (Binary a (LoadVector b)))); 23662 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 23663 ins_cost(150); 23664 ins_encode %{ 23665 int vector_len = 0; 23666 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23667 %} 23668 ins_pipe( pipe_slow ); 23669 %} 23670 23671 // a * b + c 23672 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 23673 predicate(UseFMA && n->as_Vector()->length() == 8); 23674 match(Set c (FmaVF c (Binary a b))); 23675 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 23676 ins_cost(150); 23677 ins_encode %{ 23678 int vector_len = 1; 23679 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23680 %} 23681 ins_pipe( pipe_slow ); 23682 %} 23683 23684 // a * b + c 23685 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 23686 predicate(UseFMA && n->as_Vector()->length() == 8); 23687 match(Set c (FmaVF c (Binary a (LoadVector b)))); 23688 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 23689 ins_cost(150); 23690 ins_encode %{ 23691 int vector_len = 1; 23692 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23693 %} 23694 ins_pipe( pipe_slow ); 23695 %} 23696 23697 // a * b + c 23698 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 23699 predicate(UseFMA && n->as_Vector()->length() == 16); 23700 match(Set c (FmaVF c (Binary a b))); 23701 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 23702 ins_cost(150); 23703 ins_encode %{ 23704 int vector_len = 2; 23705 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 23706 %} 23707 ins_pipe( pipe_slow ); 23708 %} 23709 23710 // a * b + c 23711 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 23712 predicate(UseFMA && n->as_Vector()->length() == 16); 23713 match(Set c (FmaVF c (Binary a (LoadVector b)))); 23714 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 23715 ins_cost(150); 23716 ins_encode %{ 23717 int vector_len = 2; 23718 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 23719 %} 23720 ins_pipe( pipe_slow ); 23721 %} 23722 23723 // --------------------------------- PopCount -------------------------------------- 23724 23725 instruct vpopcount2I(vecD dst, vecD src) %{ 23726 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 23727 match(Set dst (PopCountVI src)); 23728 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 23729 ins_encode %{ 23730 int vector_len = 0; 23731 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23732 %} 23733 ins_pipe( pipe_slow ); 23734 %} 23735 23736 instruct vpopcount4I(vecX dst, vecX src) %{ 23737 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 23738 match(Set dst (PopCountVI src)); 23739 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 23740 ins_encode %{ 23741 int vector_len = 0; 23742 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23743 %} 23744 ins_pipe( pipe_slow ); 23745 %} 23746 23747 instruct vpopcount8I(vecY dst, vecY src) %{ 23748 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 23749 match(Set dst (PopCountVI src)); 23750 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 23751 ins_encode %{ 23752 int vector_len = 1; 23753 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23754 %} 23755 ins_pipe( pipe_slow ); 23756 %} 23757 23758 instruct vpopcount16I(vecZ dst, vecZ src) %{ 23759 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 23760 match(Set dst (PopCountVI src)); 23761 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 23762 ins_encode %{ 23763 int vector_len = 2; 23764 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 23765 %} 23766 ins_pipe( pipe_slow ); 23767 %}